def test_filename_in_exception(self):
        # When possible, include the file name in the exception.
        path = 'some_file_path'
        lines = (
            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
            )
        class Bunk:
            def __init__(self, lines, path):
                self.name = path
                self._lines = lines
                self._index = 0

            def readline(self):
                if self._index == len(lines):
                    raise StopIteration
                line = lines[self._index]
                self._index += 1
                return line

        with self.assertRaises(SyntaxError):
            ins = Bunk(lines, path)
            # Make sure lacking a name isn't an issue.
            del ins.name
            detect_encoding(ins.readline)
        with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
            ins = Bunk(lines, path)
            detect_encoding(ins.readline)
Beispiel #2
0
def read_py_file(filepath):
    if sys.version_info < (3, ):
        return open(filepath, 'rU').read()
    else:
        # see https://docs.python.org/3/library/tokenize.html#tokenize.detect_encoding
        # first just see if the file is properly encoded
        try:
            with open(filepath, 'rb') as f:
                tokenize.detect_encoding(f.readline)
        except SyntaxError as err:
            # this warning is issued:
            #   (1) in badly authored files (contains non-utf8 in a comment line)
            #   (2) a coding is specified, but wrong and
            #   (3) no coding is specified, and the default
            #       'utf8' fails to decode.
            #   (4) the encoding specified by a pep263 declaration did not match
            #       with the encoding detected by inspecting the BOM
            raise CouldNotHandleEncoding(filepath, err)

        try:
            return tokenize.open(filepath).read()
            # this warning is issued:
            #   (1) if uft-8 is specified, but latin1 is used with something like \x0e9 appearing
            #       (see http://stackoverflow.com/a/5552623)
        except UnicodeDecodeError as err:
            raise CouldNotHandleEncoding(filepath, err)
Beispiel #3
0
 def update_fileinfo(cls, fileinfo, document=None):
     import tokenize
     if not document:
         try:
             with open(fileinfo.fullpathname, 'rb') as buffer:
                 encoding, lines = tokenize.detect_encoding(buffer.readline)
                 fileinfo.encoding = encoding
         except IOError:
             pass
     else:
         s = document.gettext(0, 1024).encode('utf-8', errors='ignore')
         buffer = io.BytesIO(s)
         encoding, lines = tokenize.detect_encoding(buffer.readline)
         fileinfo.encoding = encoding
Beispiel #4
0
    def patch(self, filename):
        self.current_file = filename

        with tokenize.open(filename) as fp:
            content = fp.read()

        old_content = content
        for operation in self.operations:
            content = operation.patch(content)

        if content == old_content:
            # no change
            self.check(content)
            if self.options.to_stdout:
                self.write_stdout(content)
            return False

        with open(filename, "rb") as fp:
            encoding, _ = tokenize.detect_encoding(fp.readline)

        if not self.options.quiet:
            print("Patch %s" % filename)
        if not self.options.to_stdout:
            with open(filename, "w", encoding=encoding) as fp:
                fp.write(content)
        else:
            self.write_stdout(content)
        self.check(content)
        return True
Beispiel #5
0
def open_source_file(filename):
    # pylint: disable=consider-using-with
    with open(filename, "rb") as byte_stream:
        encoding = detect_encoding(byte_stream.readline)[0]
    stream = open(filename, newline=None, encoding=encoding)
    data = stream.read()
    return stream, encoding, data
def insert_suppressions(
    source: bytes,
    comments: Iterable[SuppressionComment],
    *,
    code_width: int = DEFAULT_CODE_WIDTH,
    min_comment_width: int = DEFAULT_MIN_COMMENT_WIDTH,
) -> InsertSuppressionsResult:
    """
    Given an iterable of `lines`, forms a new sequence of lines with `comments`
    inserted.
    """
    encoding = tokenize.detect_encoding(BytesIO(source).readline)[0]
    tokens = tuple(tokenize.tokenize(BytesIO(source).readline))
    indentations = _get_indentations(tokens)
    physical_to_logical = LineMappingInfo.compute(tokens=tokens).physical_to_logical
    comments_queue = deque(sorted(comments))  # sort by line number
    updated_lines = []

    for line_number, line_bytes in enumerate(BytesIO(source).readlines(), start=1):
        while comments_queue:
            target_line = physical_to_logical[comments_queue[0].before_line]
            if target_line == line_number:
                indent = indentations[line_number]
                width = max(code_width - len(indent), min_comment_width)
                for line in comments_queue.popleft().to_lines(width):
                    updated_lines.append(f"{indent}{line}\n".encode(encoding))
            else:
                break
        updated_lines.append(line_bytes)

    return InsertSuppressionsResult(
        updated_source=b"".join(updated_lines), failed_insertions=tuple(comments_queue)
    )
Beispiel #7
0
def roundtrip(filename, output=sys.stdout):
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    tree = compile(source, filename, "exec", ast.PyCF_ONLY_AST)
    Unparser(tree, output)
Beispiel #8
0
    def get_source(self, fullname):
        """Concrete implementation of InspectLoader.get_source."""
        path = self.get_filename(fullname)
        try:
            source_bytes = self.get_data(path)
        except IOError:
            raise ImportError("source not available through get_data()")

        if py3k:
            import io, tokenize

            readsource = io.BytesIO(source_bytes).readline
            try:
                encoding = tokenize.detect_encoding(readsource)
            except SyntaxError as exc:
                raise ImportError("Failed to detect encoding")

            newline_decoder = io.IncrementalNewlineDecoder(None, True)
            try:
                return newline_decoder.decode(source_bytes.decode(encoding[0]))
            except UnicodeDecodeError as exc:
                raise ImportError("Failed to decode source file")

        else:
            return source_bytes  # XXX proper encoding
def read_py_url(url, errors='replace', skip_encoding_cookie=True):
    """Read a Python file from a URL, using the encoding declared inside the file.
    
    Parameters
    ----------
    url : str
      The URL from which to fetch the file.
    errors : str
      How to handle decoding errors in the file. Options are the same as for
      bytes.decode(), but here 'replace' is the default.
    skip_encoding_cookie : bool
      If True (the default), and the encoding declaration is found in the first
      two lines, that line will be excluded from the output - compiling a
      unicode string with an encoding declaration is a SyntaxError in Python 2.
    
    Returns
    -------
    A unicode string containing the contents of the file.
    """
    response = urllib.request.urlopen(url)
    buffer = io.BytesIO(response.read())
    encoding, lines = detect_encoding(buffer.readline)
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return "".join(strip_encoding_cookie(text))
    else:
        return text.read()
Beispiel #10
0
    def __init__(self,
                 source: IO,
                 modname: str,
                 srcname: str,
                 decoded: bool = False) -> None:
        self.modname = modname  # name of the module
        self.srcname = srcname  # name of the source file

        # cache the source code as well
        pos = source.tell()
        if not decoded:
            warnings.warn('decode option for ModuleAnalyzer is deprecated.',
                          RemovedInSphinx40Warning,
                          stacklevel=2)
            self._encoding, _ = tokenize.detect_encoding(source.readline)
            source.seek(pos)
            self.code = source.read().decode(self._encoding)
        else:
            self._encoding = None
            self.code = source.read()

        # will be filled by parse()
        self.annotations = None  # type: Dict[Tuple[str, str], str]
        self.attr_docs = None  # type: Dict[Tuple[str, str], List[str]]
        self.finals = None  # type: List[str]
        self.overloads = None  # type: Dict[str, List[Signature]]
        self.tagorder = None  # type: Dict[str, int]
        self.tags = None  # type: Dict[str, Tuple[str, int, int]]
Beispiel #11
0
 def test_cookie_second_line_empty_first_line(self):
     lines = (b'\n', b'# vim: set fileencoding=iso8859-15 :\n',
              b"print('\xe2\x82\xac')\n")
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso8859-15')
     expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
     self.assertEqual(consumed_lines, expected)
Beispiel #12
0
 def test_matched_bom_and_cookie_second_line(self):
     lines = (b'\xef\xbb\xbf#! something\n', b'f# coding=utf-8\n',
              b'print(something)\n', b'do_something(else)\n')
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8-sig')
     self.assertEqual(consumed_lines,
                      [b'#! something\n', b'f# coding=utf-8\n'])
Beispiel #13
0
 def test_cookie_second_line_no_bom(self):
     lines = (b'#! something\n', b'# vim: set fileencoding=ascii :\n',
              b'print(something)\n', b'do_something(else)\n')
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'ascii')
     expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
     self.assertEqual(consumed_lines, expected)
Beispiel #14
0
 def test_bom_no_cookie(self):
     lines = (b'\xef\xbb\xbf# something\n', b'print(something)\n',
              b'do_something(else)\n')
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8-sig')
     self.assertEqual(consumed_lines,
                      [b'# something\n', b'print(something)\n'])
Beispiel #15
0
def check(file):
    """check(file_or_dir)

    If file_or_dir is a directory and not a symbolic link, then recursively
    descend the directory tree named by file_or_dir, checking all .py files
    along the way. If file_or_dir is an ordinary Python source file, it is
    checked for whitespace related problems. The diagnostic messages are
    written to standard output using the print statement.
    """

    if os.path.isdir(file) and not os.path.islink(file):
        if verbose:
            print("%r: listing directory" % (file, ))
        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if (os.path.isdir(fullname) and not os.path.islink(fullname)
                    or os.path.normcase(name[-3:]) == ".py"):
                check(fullname)
        return

    with open(file, 'rb') as f:
        encoding, lines = tokenize.detect_encoding(f.readline)

    try:
        f = open(file, encoding=encoding)
    except IOError as msg:
        errprint("%r: I/O Error: %s" % (file, msg))
        return

    if verbose > 1:
        print("checking %r ..." % file)

    try:
        process_tokens(tokenize.generate_tokens(f.readline))

    except tokenize.TokenError as msg:
        errprint("%r: Token Error: %s" % (file, msg))
        return

    except IndentationError as msg:
        errprint("%r: Indentation Error: %s" % (file, msg))
        return

    except NannyNag as nag:
        badline = nag.get_lineno()
        line = nag.get_line()
        if verbose:
            print("%r: *** Line %d: trouble in tab city! ***" %
                  (file, badline))
            print("offending line: %r" % (line, ))
            print(nag.get_msg())
        else:
            if ' ' in file: file = '"' + file + '"'
            if filename_only: print(file)
            else: print(file, badline, repr(line))
        return

    if verbose:
        print("%r: Clean bill of health." % (file, ))
Beispiel #16
0
def roundtrip(filename, output=sys.stdout):
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    tree = compile(source, filename, "exec", ast.PyCF_ONLY_AST)
    Unparser(tree, output)
Beispiel #17
0
def read_text_file(filename, encoding=None):
    """Read text file.

    Give back the contents, and the encoding we used.

    Unless specified manually, We have no way of knowing what text
    encoding this file may be in.

    The standard Python 'open' method uses the default system encoding
    to read text files in Python 3 or falls back to utf-8.

    On Python 3 we can use tokenize to detect the encoding.

    On Python 2 we can use chardet to detect the encoding.

    """
    # Only if the encoding is not manually specified, we may try to
    # detect it.
    if encoding is None and detect_encoding is not None:
        with open(filename, 'rb') as filehandler:
            encoding = detect_encoding(filehandler.readline)[0]

    with open(filename, 'rb') as filehandler:
        data = filehandler.read()

    if encoding is not None:
        return data.decode(encoding), encoding

    if HAVE_CHARDET:
        encoding_result = chardet.detect(data)
        if encoding_result and encoding_result['encoding'] is not None:
            encoding = encoding_result['encoding']
            return data.decode(encoding), encoding

    # Look for hints, PEP263-style
    if data[:3] == b'\xef\xbb\xbf':
        encoding = 'utf-8'
        return data.decode(encoding), encoding

    data_len = len(data)
    for canary in ENCODING_HINTS:
        if canary in data:
            pos = data.index(canary)
            if pos > 1 and data[pos - 1] not in (b' ', b'\n', b'\r'):
                continue
            pos += len(canary)
            coding = b''
            while pos < data_len and data[pos] not in (b' ', b'\n'):
                coding += data[pos]
                pos += 1
            encoding = coding.decode('ascii').strip()
            try:
                return data.decode(encoding), encoding
            except (LookupError, UnicodeError):
                # Try the next one
                pass

    # Fall back to utf-8
    encoding = 'utf-8'
    return data.decode(encoding), encoding
Beispiel #18
0
def read_py_url(url, errors='replace', skip_encoding_cookie=True):
    """Read a Python file from a URL, using the encoding declared inside the file.
    
    Parameters
    ----------
    url : str
      The URL from which to fetch the file.
    errors : str
      How to handle decoding errors in the file. Options are the same as for
      bytes.decode(), but here 'replace' is the default.
    skip_encoding_cookie : bool
      If True (the default), and the encoding declaration is found in the first
      two lines, that line will be excluded from the output - compiling a
      unicode string with an encoding declaration is a SyntaxError in Python 2.
    
    Returns
    -------
    A unicode string containing the contents of the file.
    """
    response = urllib.request.urlopen(url)
    buffer = io.BytesIO(response.read())
    encoding, lines = detect_encoding(buffer.readline)
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return "".join(strip_encoding_cookie(text))
    else:
        return text.read()
Beispiel #19
0
def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
    """Converts a bytes string with python source code to unicode.

    Unicode strings are passed through unchanged. Byte strings are checked
    for the python source file encoding cookie to determine encoding.
    txt can be either a bytes buffer or a string containing the source
    code.
    """
    if isinstance(txt, unicode):
        return txt
    if isinstance(txt, bytes):
        buffer = BytesIO(txt)
    else:
        buffer = txt
    try:
        encoding, _ = detect_encoding(buffer.readline)
    except SyntaxError:
        encoding = "ascii"
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return u"".join(strip_encoding_cookie(text))
    else:
        return text.read()
Beispiel #20
0
def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
    """Converts a bytes string with python source code to unicode.

    Unicode strings are passed through unchanged. Byte strings are checked
    for the python source file encoding cookie to determine encoding.
    txt can be either a bytes buffer or a string containing the source
    code.
    """
    if isinstance(txt, unicode_type):
        return txt
    if isinstance(txt, bytes):
        buf = BytesIO(txt)
    else:
        buf = txt
    try:
        encoding, _ = detect_encoding(buf.readline)
    except SyntaxError:
        encoding = "ascii"
    buf.seek(0)
    text = TextIOWrapper(buf, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return u"".join(strip_encoding_cookie(text))
    else:
        return text.read()
Beispiel #21
0
    def execute(self):
        # Try to detect the encoding for you.
        with open(self.script, 'rb') as file:
            try:
                encoding = tokenize.detect_encoding(file.readline)[0]
            except SyntaxError:
                encoding = "utf-8"

        # Set the global values for the module.
        global_values = {
            '__file__': self.script,       # Use actual filename of the script.
            '__name__': '__main__'         # Make sure that 'if __name__ == "__main__"'-hook works
        }

        with open(self.script, 'r', encoding=encoding) as file:
            # Do not inherit any 'from future import ...'-statements
            # that may be used by AnimaFX.
            # Additionally set the current filename.
            module = compile(file.read(), self.script, 'exec', False)

        try:
            exec(module, global_values)
        # Reraise any occuring exceptions
        except (SystemExit, KeyboardInterrupt) as e:
            raise e

        # Print the exception
        except BaseException as e:
            traceback.print_exception(e.__class__, e, e.__traceback__)
            return False

        return True
Beispiel #22
0
 def encode(self, chars):
     if isinstance(chars, bytes):
         # This is either plain ASCII, or Tk was returning mixed-encoding
         # text to us. Don't try to guess further.
         return chars
     # Preserve a BOM that might have been present on opening
     if self.fileencoding == 'utf-8-sig':
         return chars.encode('utf-8-sig')
     # See whether there is anything non-ASCII in it.
     # If not, no need to figure out the encoding.
     try:
         return chars.encode('ascii')
     except UnicodeEncodeError:
         pass
     # Check if there is an encoding declared
     try:
         encoded = chars.encode('ascii', 'replace')
         enc, _ = tokenize.detect_encoding(io.BytesIO(encoded).readline)
         return chars.encode(enc)
     except SyntaxError as err:
         failed = str(err)
     except UnicodeEncodeError:
         failed = "Invalid encoding '%s'" % enc
     messagebox.showerror(
         "I/O Error",
         "%s.\nSaving as UTF-8" % failed,
         parent=self.text)
     # Fallback: save as UTF-8, with BOM - ignoring the incorrect
     # declared encoding
     return chars.encode('utf-8-sig')
Beispiel #23
0
def _readSourceCodeFromFilename3(source_filename):
    import tokenize

    try:
        with open(source_filename, "rb") as source_file:
            encoding = tokenize.detect_encoding(source_file.readline)[0]  # @UndefinedVariable

            # Rewind to get the whole file.
            source_file.seek(0)

            source_code = source_file.read()

        return source_code.decode(encoding)
    except SyntaxError as e:
        if Options.isFullCompat():
            if PythonVersions.doShowUnknownEncodingName():
                match = re.match("unknown encoding for '.*?': (.*)", e.args[0])
                complaint = match.group(1)
            else:
                complaint = "with BOM"

            e.args = (
                "encoding problem: %s" % complaint,
                (source_filename, 1, None, None)
            )

            if hasattr(e, "msg"):
                e.msg = e.args[0]

        raise
    def _LoadModule(self, name, fp, path, info, deferredImports,
            parent = None, namespace = False):
        """Load the module, given the information acquired by the finder."""
        suffix, mode, type = info
        if type == imp.PKG_DIRECTORY:
            return self._LoadPackage(name, path, parent, deferredImports,
                    namespace)
        module = self._AddModule(name, file_name=path, parent=parent)

        if type == imp.PY_SOURCE:
            logging.debug("Adding module [%s] [PY_SOURCE]", name)
            # Load & compile Python source code
            fp = open(path, "rb")
            encoding, lines = tokenize.detect_encoding(fp.readline)
            fp = open(path, "U", encoding = encoding)
            codeString = fp.read()
            if codeString and codeString[-1] != "\n":
                codeString = codeString + "\n"
            try:
                module.code = compile(codeString, path, "exec")
            except SyntaxError:
                raise ImportError("Invalid syntax in %s" % path)
        
        elif type == imp.PY_COMPILED:
            logging.debug("Adding module [%s] [PY_COMPILED]", name)
            # Load Python bytecode
            if isinstance(fp, bytes):
                magic = fp[:4]
            else:
                magic = fp.read(4)
            if magic != imp.get_magic():
                raise ImportError("Bad magic number in %s" % path)
            skip_bytes = 8
            if isinstance(fp, bytes):
                module.code = marshal.loads(fp[skip_bytes+4:])
                module.source_is_zip_file = True
            else:
                fp.read(skip_bytes)
                module.code = marshal.load(fp)
        
        elif type == imp.C_EXTENSION:
            logging.debug("Adding module [%s] [C_EXTENSION]", name)

        # If there's a custom hook for this module, run it.
        self._RunHook("load", module.name, module)
        
        if module.code is not None:
            if self.replace_paths:
                topLevelModule = module
                while topLevelModule.parent is not None:
                    topLevelModule = topLevelModule.parent
                module.code = self._ReplacePathsInCode(topLevelModule,
                        module.code)
            
            # Scan the module code for import statements
            self._ScanCode(module.code, module, deferredImports)
        
        module.in_import = False
        return module
Beispiel #25
0
	def _read_file(filename):
		# read the file contents, obeying the python encoding marker
		with open(filename, 'rb') as fp:
			encoding, _ = tokenize.detect_encoding(fp.readline)
		with open(filename, 'rt', encoding=encoding) as fp:
			content = fp.read()
		content += '\n\n'
		return content
def read_source_code(filename):
    with open(filename, 'rb') as source_file:
        encoding, first_lines = tokenize.detect_encoding(source_file.readline)
        source_bytes = b''.join(first_lines) + source_file.read()

    newline_decoder = io.IncrementalNewlineDecoder(None, translate=True)
    source_code = newline_decoder.decode(source_bytes.decode(encoding))
    return source_code.splitlines(True)
Beispiel #27
0
def read_pyfile(filename):
    """Read and return the contents of a Python source file (as a
    string), taking into account the file encoding."""
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    return source
Beispiel #28
0
def _stdin_get_value_py3():  # type: () -> str
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        coding, _ = tokenize.detect_encoding(fd.readline)
        return stdin_value.decode(coding)
    except (LookupError, SyntaxError, UnicodeError):
        return stdin_value.decode("utf-8")
Beispiel #29
0
def _stdin_get_value_py3():
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        (coding, lines) = tokenize.detect_encoding(fd.readline)
        return io.StringIO(stdin_value.decode(coding))
    except (LookupError, SyntaxError, UnicodeError):
        return io.StringIO(stdin_value.decode("utf-8"))
Beispiel #30
0
def _stdin_get_value_py3():
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        (coding, lines) = tokenize.detect_encoding(fd.readline)
        return io.StringIO(stdin_value.decode(coding))
    except (LookupError, SyntaxError, UnicodeError):
        return io.StringIO(stdin_value.decode("utf-8"))
Beispiel #31
0
 def test_cookie_second_line_noncommented_first_line(self):
     lines = (b"print('\xc2\xa3')\n",
              b'# vim: set fileencoding=iso8859-15 :\n',
              b"print('\xe2\x82\xac')\n")
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     expected = [b"print('\xc2\xa3')\n"]
     self.assertEqual(consumed_lines, expected)
Beispiel #32
0
def read_pyfile(filename):
    """Read and return the contents of a Python source file (as a
    string), taking into account the file encoding."""
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    return source
Beispiel #33
0
    def _LoadModule(self, name, fp, path, info, deferredImports, parent=None):
        """Load the module, given the information acquired by the finder."""
        suffix, mode, type = info
        if type == imp.PKG_DIRECTORY:
            return self._LoadPackage(name, path, parent, deferredImports)
        module = self._AddModule(name, file_name=path, parent=parent)

        if type == imp.PY_SOURCE:
            logging.debug("Adding module [%s] [PY_SOURCE]", name)
            # Load & compile Python source code
            # if file opened, it already use good encoding; else detect it manually
            if not fp:
                with open(path, "rb") as f:
                    encoding = tokenize.detect_encoding(f.readline)[0]
                fp = open(path, "r", encoding=encoding)
            codeString = fp.read()
            if codeString and codeString[-1] != "\n":
                codeString = codeString + "\n"
            try:
                module.code = compile(codeString,
                                      path,
                                      "exec",
                                      optimize=self.optimizeFlag)
            except SyntaxError:
                raise ImportError("Invalid syntax in %s" % path)

        elif type == imp.PY_COMPILED:
            logging.debug("Adding module [%s] [PY_COMPILED]", name)
            # Load Python bytecode
            if isinstance(fp, bytes):
                fp = io.BytesIO(fp)
                module.source_is_zip_file = True
            module.code = pkgutil.read_code(fp)
            if module.code is None:
                raise ImportError("Bad magic number in %s" % path)

        elif type == imp.C_EXTENSION:
            logging.debug("Adding module [%s] [C_EXTENSION]", name)

        # If there's a custom hook for this module, run it.
        self._RunHook("load", module.name, module)

        if module.code is not None:
            if self.replace_paths:
                topLevelModule = module
                while topLevelModule.parent is not None:
                    topLevelModule = topLevelModule.parent
                module.code = self._ReplacePathsInCode(topLevelModule,
                                                       module.code)

            # Scan the module code for import statements
            self._ScanCode(module.code, module, deferredImports)

            # Verify __package__ in use
            self._ReplacePackageInCode(module)

        module.in_import = False
        return module
Beispiel #34
0
def check(file, depth):
    if depth > 1 and os.path.isfile(os.path.join(file, ".git")): return
    if os.path.isdir(file) and not os.path.islink(file):
        if verbose:
            print("listing directory", file)
        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if ((recurse and os.path.isdir(fullname)
                 and not os.path.islink(fullname)
                 and not os.path.split(fullname)[1].startswith("."))
                    or name.lower().endswith(".py")):
                check(fullname, depth + 1)
        return

    if verbose:
        print("checking", file, "...", end=' ')
    with open(file, 'rb') as f:
        try:
            encoding, _ = tokenize.detect_encoding(f.readline)
        except SyntaxError as se:
            errprint("%s: SyntaxError: %s" % (file, str(se)))
            return
    try:
        print(file)
        with open(file, encoding=encoding) as f:
            r = Reindenter(f)
    except IOError as msg:
        errprint("%s: I/O Error: %s" % (file, str(msg)))
        return

    newline = spec_newline if spec_newline else r.newlines
    if isinstance(newline, tuple):
        errprint(
            "%s: mixed newlines detected; cannot continue without --newline" %
            file)
        return

    if r.run():
        if verbose:
            print("changed.")
            if dryrun:
                print("But this is a dry run, so leaving it alone.")
        if not dryrun:
            bak = file + ".bak"
            if makebackup:
                shutil.copyfile(file, bak)
                if verbose:
                    print("backed up", file, "to", bak)
            with open(file, "w", encoding=encoding, newline=newline) as f:
                r.write(f)
            if verbose:
                print("wrote new", file)
        return True
    else:
        if verbose:
            print("unchanged.")
        return False
Beispiel #35
0
 def test_cookie_first_line_no_bom(self):
     lines = (
         b'# -*- coding: latin-1 -*-\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso-8859-1')
     self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Beispiel #36
0
 def test_no_bom_no_encoding_cookie(self):
     lines = (
         b'# something\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     self.assertEqual(consumed_lines, list(lines[:2]))
Beispiel #37
0
def _detect_encoding(source: bytes) -> str:
    """

	:param bytes source:
	:type source:
	:return:
	:rtype:
	"""
    return tokenize.detect_encoding(io.BytesIO(source).readline)[0]
Beispiel #38
0
def _get_complexity(src_code):
    to_count = 'print'
    encoding = detect_encoding(
        (l.encode() for l in src_code.split(os_linesep, 1)).__next__)[0]

    with BytesIO(src_code.encode(encoding, 'ignore')) as src_stream:
        return dict(
            Counter(t[1] for t in tokenize(src_stream.readline)
                    if t[0] is t_name and t[1] == to_count))
Beispiel #39
0
 def test_matched_bom_and_cookie_first_line(self):
     lines = (
         b'\xef\xbb\xbf# coding=utf-8\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8-sig')
     self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Beispiel #40
0
 def test_no_bom_no_encoding_cookie(self):
     lines = (
         b'# something\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     self.assertEqual(consumed_lines, list(lines[:2]))
Beispiel #41
0
 def test_cookie_first_line_no_bom(self):
     lines = (
         b'# -*- coding: latin-1 -*-\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso-8859-1')
     self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Beispiel #42
0
def load_settings(filename, settings):
    encoding = 'utf-8'
    with open(filename, 'rb') as fp:
        try:
            encoding = tokenize.detect_encoding(fp.readline)[0]
        except SyntaxError:
            pass

    with open(filename, 'r', encoding=encoding) as fp:
        exec(compile(fp.read(), filename, 'exec'), settings, settings)
Beispiel #43
0
 def open_source_file(filename):
     byte_stream = open(filename, 'bU')
     encoding = detect_encoding(byte_stream.readline)[0]
     stream = open(filename, 'U', encoding=encoding)
     try:
         data = stream.read()
     except UnicodeError, uex: # wrong encodingg
         # detect_encoding returns utf-8 if no encoding specified
         msg = 'Wrong (%s) or no encoding specified' % encoding
         raise ASTNGBuildingException(msg)
 def _read(filename):
     try:
         with open(filename, 'rb') as f:
             (encoding, _) = tokenize.detect_encoding(f.readline)
     except (LookupError, SyntaxError, UnicodeError):
         # Fall back if file encoding is improperly declared
         with open(filename, encoding='latin-1') as f:
             return f.readlines()
     with open(filename, 'r', encoding=encoding) as f:
         return f.readlines()
Beispiel #45
0
def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string: the text of the program.

    Returns a string, the name of the encoding.

    """
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]
    def test_short_files(self):
        readline = self.get_readline((b'print(something)\n',))
        encoding, consumed_lines = detect_encoding(readline)
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [b'print(something)\n'])

        encoding, consumed_lines = detect_encoding(self.get_readline(()))
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [])

        readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
        encoding, consumed_lines = detect_encoding(readline)
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [b'print(something)\n'])

        readline = self.get_readline((b'\xef\xbb\xbf',))
        encoding, consumed_lines = detect_encoding(readline)
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [])
Beispiel #47
0
 def tokopen(filename):
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     buf = io.open(filename, "rb")  # Tweaked to use io.open for Python 2
     encoding, lines = detect_encoding(buf.readline)
     buf.seek(0)
     text = io.TextIOWrapper(buf, encoding, line_buffering=True)
     text.mode = "r"
     return text
Beispiel #48
0
def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string: the text of the program.

    Returns a string, the name of the encoding.

    """
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]
 def open(filename):
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     buffer = io.open(filename, 'rb')   # Tweaked to use io.open for Python 2
     encoding, lines = detect_encoding(buffer.readline)
     buffer.seek(0)
     text = TextIOWrapper(buffer, encoding, line_buffering=True)
     text.mode = 'r'
     return text   
Beispiel #50
0
def load_setup():
    """run the setup script (i.e the setup.py file)

    This function load the setup file in all cases (even if it have already
    been loaded before, because we are monkey patching its setup function with
    a particular one"""
    with open("setup.py", "rb") as f:
        encoding, lines = detect_encoding(f.readline)
    with open("setup.py", encoding=encoding) as f:
        imp.load_module("setup", f, "setup.py", (".py", "r", imp.PY_SOURCE))
Beispiel #51
0
 def open_source_file(filename):
     byte_stream = open(filename, 'bU')
     encoding = detect_encoding(byte_stream.readline)[0]
     stream = open(filename, 'U', encoding=encoding)
     try:
         data = stream.read()
     except UnicodeError, uex:  # wrong encodingg
         # detect_encoding returns utf-8 if no encoding specified
         msg = 'Wrong (%s) or no encoding specified' % encoding
         raise AstroidBuildingException(msg)
 def test_cookie_second_line_noncommented_first_line(self):
     lines = (
         b"print('\xc2\xa3')\n",
         b'# vim: set fileencoding=iso8859-15 :\n',
         b"print('\xe2\x82\xac')\n"
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     expected = [b"print('\xc2\xa3')\n"]
     self.assertEqual(consumed_lines, expected)
Beispiel #53
0
 def test_utf8_normalization(self):
     # See get_normal_name() in tokenizer.c.
     encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
     for encoding in encodings:
         for rep in ("-", "_"):
             enc = encoding.replace("-", rep)
             lines = (b"#!/usr/bin/python\n", b"# coding: " + enc.encode("ascii") + b"\n", b"1 + 3\n")
             rl = self.get_readline(lines)
             found, consumed_lines = detect_encoding(rl)
             self.assertEqual(found, "utf-8")
Beispiel #54
0
def load_setup():
    """run the setup script (i.e the setup.py file)

    This function load the setup file in all cases (even if it have already
    been loaded before, because we are monkey patching its setup function with
    a particular one"""
    with open("setup.py", "rb") as f:
        encoding, lines = detect_encoding(f.readline)
    with open("setup.py", encoding=encoding) as f:
        imp.load_module("setup", f, "setup.py", (".py", "r", imp.PY_SOURCE))
 def test_cookie_second_line_empty_first_line(self):
     lines = (
         b'\n',
         b'# vim: set fileencoding=iso8859-15 :\n',
         b"print('\xe2\x82\xac')\n"
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso8859-15')
     expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
     self.assertEqual(consumed_lines, expected)
 def test_bom_no_cookie(self):
     lines = (
         b'\xef\xbb\xbf# something\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     self.assertEqual(consumed_lines,
                      [b'# something\n', b'print(something)\n'])
Beispiel #57
0
def stdin_get_value() -> str:
    """Get and cache it so plugins can use it."""
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        coding, _ = tokenize.detect_encoding(fd.readline)
        fd.seek(0)
        return io.TextIOWrapper(fd, coding).read()
    except (LookupError, SyntaxError, UnicodeError):
        return stdin_value.decode("utf-8")
Beispiel #58
0
 def open(filename):
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     buffer = io.open(filename, 'rb')  # Tweaked to use io.open for Python 2
     encoding, lines = detect_encoding(buffer.readline)
     buffer.seek(0)
     text = TextIOWrapper(buffer, encoding, line_buffering=True)
     text.mode = 'r'
     return text
Beispiel #59
0
 def _LoadModule(self, name, fp, path, info, deferredImports,
         parent = None, namespace = False):
     """Load the module, given the information acquired by the finder."""
     suffix, mode, type = info
     if type == imp.PKG_DIRECTORY:
         return self._LoadPackage(name, path, parent, deferredImports,
                 namespace)
     module = self._AddModule(name)
     module.file = path
     module.parent = parent
     
     if type == imp.PY_SOURCE:
         # Load & compile Python source code
         if sys.version_info[0] >= 3:
             # For Python 3, read the file with the correct encoding
             import tokenize
             fp = open(path, "rb")
             encoding, lines = tokenize.detect_encoding(fp.readline)
             fp = open(path, "U", encoding = encoding)
         codeString = fp.read()
         if codeString and codeString[-1] != "\n":
             codeString = codeString + "\n"
         module.code = compile(codeString, path, "exec")
     
     elif type == imp.PY_COMPILED:
         # Load Python bytecode
         if isinstance(fp, str):
             magic = fp[:4]
         else:
             magic = fp.read(4)
         if magic != imp.get_magic():
             raise ImportError("Bad magic number in %s" % path)
         if isinstance(fp, str):
             module.code = marshal.loads(fp[8:])
             module.inZipFile = True
         else:
             fp.read(4)
             module.code = marshal.load(fp)
     
     # If there's a custom hook for this module, run it.
     self._RunHook("load", module.name, module)
     
     if module.code is not None:
         if self.replacePaths:
             topLevelModule = module
             while topLevelModule.parent is not None:
                 topLevelModule = topLevelModule.parent
             module.code = self._ReplacePathsInCode(topLevelModule,
                     module.code)
         
         # Scan the module code for import statements
         self._ScanCode(module.code, module, deferredImports)
     
     module.inImport = False
     return module
Beispiel #60
0
def decode_source(source_bytes):
    # copied from _bootstrap_external.py
    """Decode bytes representing source code and return the string.
    Universal newline support is used in the decoding.
    """
    import _io
    import tokenize  # To avoid bootstrap issues.
    source_bytes_readline = _io.BytesIO(source_bytes).readline
    encoding = tokenize.detect_encoding(source_bytes_readline)
    newline_decoder = _io.IncrementalNewlineDecoder(None, True)
    return newline_decoder.decode(source_bytes.decode(encoding[0]))