Esempio n. 1
0
def FileEncoding(filename):
    """Return the file's encoding."""
    try:
        with open(filename, 'rb') as fd:
            return tokenize.detect_encoding(fd.readline)[0]
    except IOError:
        raise
Esempio n. 2
0
def ReadFile(filename, logger=None):
    """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]
    except IOError as err:
        if logger:
            logger(err)
        raise

    try:
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            source = fd.read()
        return source, encoding
    except IOError as err:
        if logger:
            logger(err)
        raise
Esempio n. 3
0
def IsFortranOrHeaderFile(filename, headers_too=True):
  """Return True if filename is a Fortran file."""
  if headers_too:
    if os.path.splitext(filename)[1] in ['.F','.F90','.f','.f90','.h']: # TODO: This can be dangerous. Esp. when it's a C-header.
      return True
  elif os.path.splitext(filename)[1] in ['.F','.F90','.f','.f90']:
    return True

  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]

    # Check for correctness of encoding.
    with py3compat.open_with_encoding(filename, encoding=encoding) as fd:
      fd.read()
  except UnicodeDecodeError:
    encoding = 'latin-1'
  except (IOError, SyntaxError):
    # If we fail to detect encoding (or the encoding cookie is incorrect - which
    # will make detect_encoding raise SyntaxError), assume it's not a Fortran
    # file.
    return False

  try:
    with py3compat.open_with_encoding(filename,
                                      mode='r',
                                      encoding=encoding) as fd:
      first_line = fd.readlines()[0]
  except (IOError, IndexError):
    return False

  # In all other cases assume everything is worse.
  return False
Esempio n. 4
0
def IsPythonFile(filename):
    """Return True if filename is a Python file."""
    if os.path.splitext(filename)[1] == '.py':
        return True

    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]

        # Check for correctness of encoding.
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            fd.read()
    except UnicodeDecodeError:
        encoding = 'latin-1'
    except (IOError, SyntaxError):
        # If we fail to detect encoding (or the encoding cookie is incorrect - which
        # will make detect_encoding raise SyntaxError), assume it's not a Python
        # file.
        return False

    try:
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            first_line = fd.readlines()[0]
    except (IOError, IndexError):
        return False

    return re.match(r'^#!.*\bpython[23]?\b', first_line)
Esempio n. 5
0
def IsPythonFile(filename):
  """Return True if filename is a Python file."""
  if os.path.splitext(filename)[1] == '.py':
    return True

  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]

    # Check for correctness of encoding.
    with py3compat.open_with_encoding(filename, encoding=encoding) as fd:
      fd.read()
  except UnicodeDecodeError:
    encoding = 'latin-1'
  except (IOError, SyntaxError):
    # If we fail to detect encoding (or the encoding cookie is incorrect - which
    # will make detect_encoding raise SyntaxError), assume it's not a Python
    # file.
    return False

  try:
    with py3compat.open_with_encoding(filename,
                                      mode='r',
                                      encoding=encoding) as fd:
      first_line = fd.readlines()[0]
  except (IOError, IndexError):
    return False

  return re.match(r'^#!.*\bpython[23]?\b', first_line)
Esempio n. 6
0
def ReadFile(filename, logger=None):
  """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]
  except IOError as err:
    if logger:
      logger(err)
    raise

  try:
    with py3compat.open_with_encoding(filename, mode='r',
                                      encoding=encoding) as fd:
      source = fd.read()
    return source, encoding
  except IOError as err:
    if logger:
      logger(err)
    raise
Esempio n. 7
0
def IsPythonFile(filename):
    """Return True if filename is a Python file."""
    if os.path.splitext(filename)[1] == '.py':
        return True

    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]

        # Check for correctness of encoding.
        with py3compat.open_with_encoding(filename, encoding=encoding) as fd:
            fd.read()
    except UnicodeDecodeError:
        encoding = 'latin-1'
    except IOError:
        return False

    try:
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            first_line = fd.readlines()[0]
    except (IOError, IndexError):
        return False

    return re.match(r'^#!.*\bpython[23]?\b', first_line)
Esempio n. 8
0
def _detect_encoding(readline):
    """Return file encoding."""
    try:
        from lib2to3.pgen2 import tokenize as lib2to3_tokenize
        encoding = lib2to3_tokenize.detect_encoding(readline)[0]
        return encoding
    except (LookupError, SyntaxError, UnicodeDecodeError):
        return 'latin-1'
Esempio n. 9
0
def _detect_encoding(readline):
    """Return file encoding."""
    try:
        from lib2to3.pgen2 import tokenize as lib2to3_tokenize
        encoding = lib2to3_tokenize.detect_encoding(readline)[0]
        return encoding
    except (LookupError, SyntaxError, UnicodeDecodeError):
        return 'latin-1'
Esempio n. 10
0
    def openpy(filename):
        from lib2to3.pgen2.tokenize import detect_encoding
        import io

        # The following is copied from tokenize.py in Python 3.2,
        # Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
        buffer = io.open(filename, 'rb')
        encoding, lines = detect_encoding(buffer.readline)
        buffer.seek(0)
        text = io.TextIOWrapper(buffer, encoding, line_buffering=True)
        text.mode = 'r'
        return text
Esempio n. 11
0
    def openpy(filename):
        from lib2to3.pgen2.tokenize import detect_encoding
        import io

        # The following is copied from tokenize.py in Python 3.2,
        # Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
        buffer = io.open(filename, 'rb')
        encoding, lines = detect_encoding(buffer.readline)
        buffer.seek(0)
        text = io.TextIOWrapper(buffer, encoding, line_buffering=True)
        text.mode = 'r'
        return text
Esempio n. 12
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 13
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 14
0
def get_module_import_alias(import_name, text):
    try:
        text = text.encode(detect_encoding(BytesIO(text.encode()).readline)[0])
    except UnicodeEncodeError:  # Script contains unicode symbol. Cannot run detect_encoding as it requires ascii.
        text = text.encode('utf-8')
    try:
        ast.parse(text)
    except SyntaxError:  # Script contains syntax errors so cannot parse text
        return import_name
    for node in ast.walk(ast.parse(text)):
        if isinstance(node, ast.alias) and node.name == import_name:
            return node.asname
    return import_name
Esempio n. 15
0
def detect_encoding(filename: str) -> str:
    """Return file encoding."""
    from lib2to3.pgen2 import tokenize as lib2to3_tokenize
    mode = "rb"
    try:
        with open(filename, mode=mode) as input_file:
            encoding: str = lib2to3_tokenize.detect_encoding(input_file.readline)[0]  # type: ignore
            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding, mode=mode) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return "latin-1"
Esempio n. 16
0
def detect_encoding(filename):
    """Return file encoding."""
    try:
        with open(filename, 'rb') as input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'
Esempio n. 17
0
 def open_with_encoding_check(filename):  # type: ignore
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     fp = io.open(filename, 'rb')
     try:
         encoding, lines = detect_encoding(fp.readline)
         fp.seek(0)
         text = io.TextIOWrapper(fp, encoding, line_buffering=True)
         text.mode = 'r'
         return text
     except:
         fp.close()
         raise
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
             fp.seek(0)
             source = fp.read()
             if encoding:
                 source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = str(tree)
         if encoding:
             new = new.encode(encoding)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 19
0
def detect_encoding(filename):
    """Return file encoding."""
    try:
        with open(filename, 'rb') as input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'
Esempio n. 20
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         print("Parsing %s..." % filepath)
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
             fp.seek(0)
             source = fp.read()
             if encoding:
                 source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = str(tree)
         if encoding:
             new = new.encode(encoding)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 21
0
 def test_all_project_files(self):
     if sys.platform.startswith("win"):
         # XXX something with newlines goes wrong on Windows.
         return
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with io.open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 22
0
 def _read_python_source(self, filename):
     """
     Do our best to decode a Python source file correctly.
     """
     try:
         f = open(filename, "rb")
     except OSError as err:
         self.log_error("Can't open %s: %s", filename, err)
         return None, None
     try:
         encoding = tokenize.detect_encoding(f.readline)[0]
     finally:
         f.close()
     with _open_with_encoding(filename, "r", encoding=encoding) as f:
         return _from_system_newlines(f.read()), encoding
Esempio n. 23
0
 def test_all_project_files(self):
     if sys.platform.startswith("win"):
         # XXX something with newlines goes wrong on Windows.
         return
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding, "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 24
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             print('ParseError on file', filepath, err)
             continue
         new = str(tree)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 25
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             print('ParseError on file', filepath, err)
             continue
         new = str(tree)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 26
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         diffResult = diff(filepath, new, encoding)
         if diffResult:
             self.fail(
                 "Idempotency failed: {} using {} encoding\n{}".format(
                     filepath, encoding, diffResult))
Esempio n. 27
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding, "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             if verbose > 0:
                 warnings.warn("ParseError on file %s (%s)" % (filepath, err))
             continue
         new = str(tree)
         x = diff(filepath, new)
         if x:
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 28
0
 def parse_string(self, code_str):
     """Parse a program string and remove unwanted outer levels in AST."""
     # see lib2to3.tests.support.parse_string -- but we don't do the dedent
     # (support.reformat)
     if not isinstance(code_str, str):
         encoding, _ = tokenize2to3.detect_encoding(
             io.BytesIO(code_str).readline)
         code_str = str(code_str, encoding)
     features = refactor._detect_future_features(code_str)  # pylint: disable=protected-access
     if "print_function" in features:
         driver = self._drivers["no_print_statement"]
     else:
         driver = self._drivers["print_statement"]
     code_ast = driver.parse_string(code_str + "\n\n", debug=False)
     if code_ast:
         code_ast.parent = None
     return code_ast
Esempio n. 29
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             if verbose > 0:
                 warnings.warn('ParseError on file %s (%s)' % (filepath, err))
             continue
         new = str(tree)
         x = diff(filepath, new)
         if x:
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 30
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError:
             try:
                 tree = driver_no_print_statement.parse_string(source)
             except ParseError as err:
                 self.fail('ParseError on file %s (%s)' % (filepath, err))
         new = str(tree)
         if new != source:
             print(diff_texts(source, new, filepath))
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 31
0
 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError:
             try:
                 tree = driver_no_print_statement.parse_string(source)
             except ParseError as err:
                 self.fail('ParseError on file %s (%s)' % (filepath, err))
         new = str(tree)
         if new != source:
             print(diff_texts(source, new, filepath))
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 32
0
    def test_all_project_files(self):
        if sys.platform.startswith("win"):
            # XXX something with newlines goes wrong on Windows.
            return
        for filepath in support.all_project_files():
            with open(filepath, "rb") as fp:
                encoding = tokenize.detect_encoding(fp.readline)[0]
            self.assertTrue(encoding is not None, "can't detect encoding for %s" % filepath)
            with open(filepath, "r") as fp:
                source = fp.read()
                source = source.decode(encoding)
            tree = driver.parse_string(source)

            from test import test_support

            if test_support.due_to_ironpython_bug("http://ironpython.codeplex.com/workitem/28171"):
                continue
            new = unicode(tree)
            if diff(filepath, new, encoding):
                self.fail("Idempotency failed: %s" % filepath)
Esempio n. 33
0
 def test_all_project_files(self):
     if sys.platform.startswith("win"):
         # XXX something with newlines goes wrong on Windows.
         return
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         
         from test import test_support
         if test_support.due_to_ironpython_bug("http://ironpython.codeplex.com/workitem/28171"):
             continue
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)
Esempio n. 34
0
def _get_imported_from_future(code_str):
    """
    Parse the given code and return a list of names that are imported
    from __future__.
    :param code_str: The code to parse
    :return list: List of names that are imported from __future__
    """
    future_imports = []
    try:
        code_str = code_str.encode(
            detect_encoding(BytesIO(code_str.encode()).readline)[0])
    except UnicodeEncodeError:  # Script contains unicode symbol. Cannot run detect_encoding as it requires ascii.
        code_str = code_str.encode('utf-8')
    for node in ast.walk(ast.parse(code_str)):
        if isinstance(node, ast.ImportFrom):
            if node.module == '__future__':
                future_imports.extend(
                    [import_alias.name for import_alias in node.names])
                break
    return future_imports
Esempio n. 35
0
def ReadFile(filename, logger=None):
    """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]
    except IOError as err:
        if logger:
            logger(err)
        raise

    try:
        # Preserves line endings.
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding,
                                          newline='') as fd:
            lines = fd.readlines()

        line_ending = file_resources.LineEnding(lines)
        source = '\n'.join(line.rstrip('\r\n') for line in lines) + '\n'
        return source, line_ending, encoding
    except IOError as err:  # pragma: no cover
        if logger:
            logger(err)
        raise
Esempio n. 36
0
def detect_encoding(filename):
    """Return file encoding."""

    try:
        input_file = open(filename, 'rb')
    except (IOError, OSError):
        # If the file doesn't exist, return the same thing
        # detect_encoding gives us for an empty file, utf-8.
        return 'utf-8'

    try:
        with input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'
Esempio n. 37
0
def detect_encoding(filename):
    """Return file encoding."""

    try:
        input_file = open(filename, 'rb')
    except (IOError, OSError):
        # If the file doesn't exist, return the same thing
        # detect_encoding gives us for an empty file, utf-8.
        return 'utf-8'

    try:
        with input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'
Esempio n. 38
0
def ReadFile(filename, logger=None):
  """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]
  except IOError as err:
    if logger:
      logger(err)
    raise

  try:
    # Preserves line endings.
    with py3compat.open_with_encoding(
        filename, mode='r', encoding=encoding, newline='') as fd:
      lines = fd.readlines()

    line_ending = file_resources.LineEnding(lines)
    source = '\n'.join(line.rstrip('\r\n') for line in lines) + '\n'
    return source, line_ending, encoding
  except IOError as err:  # pragma: no cover
    if logger:
      logger(err)
    raise
Esempio n. 39
0
def read_file_using_source_encoding(filename):
    with open(filename, 'rb') as infile:
        encoding = tokenize.detect_encoding(infile.readline)[0]
    with io.open(filename, 'r', encoding=encoding) as infile_with_encoding:
        return infile_with_encoding.read()
Esempio n. 40
0
def decode_string_using_source_encoding(b):
    encoding = tokenize.detect_encoding(io.BytesIO(b).readline)[0]
    return b.decode(encoding)
Esempio n. 41
0
def FileEncoding(filename):
  """Return the file's encoding."""
  with open(filename, 'rb') as fd:
    return tokenize.detect_encoding(fd.readline)[0]
Esempio n. 42
0
def decode_string_using_source_encoding(b):
    encoding = tokenize.detect_encoding(io.BytesIO(b).readline)[0]
    return b.decode(encoding)
Esempio n. 43
0
def detect_encoding(pyFile):
	from lib2to3.pgen2 import tokenize
	f=open(pyFile,'rb')
	return tokenize.detect_encoding(f.readline)[0]
Esempio n. 44
0
def make_file_from_contents(path: str, contents_bytes: bytes) -> File:
    """Wrapper for File constructor.

    Computes the line offsets and creates a `File` object from
    `contents_bytes`. (`path` and `encoding` are passed through to the
    `File` object.)
    """
    # pylint: disable=too-many-locals
    with io.BytesIO(contents_bytes) as src_f:
        try:
            encoding, _ = tokenize.detect_encoding(src_f.readline)  # type: ignore
        except LookupError as exc:
            # TODO: first arg of UnicodeDecodError is encoding, but we
            #       don't know that, so this is an inappropriate error
            #       to raise.
            raise UnicodeDecodeError('???', contents_bytes, 0, 1, str(exc))
        if encoding == 'utf8-sig':  # TODO: see https://bugs.python.org/issue39155
            encoding = 'utf-8-sig'
        decoder = codecs.getincrementaldecoder(encoding)()
        chr_to_byte_offset: Dict[int, int] = {}
        chr_offset = 0
        last_byte_offset = 0
        contents_list = []
        for byte_offset, by in enumerate(contents_bytes):
            # TODO: benchmark other methods of converting an int to a byte:
            #     by.to_bytes(1, sys.byteorder, signed=False))
            #     struct.unpack('1c', by))[0]
            #     chr(by).encode('latin1')
            #   (probably these are all dwarfed by the time used
            #   to process the AST)
            ch = decoder.decode(bytes([by]))  # Can raies UnicodeDecodeError
            if ch:
                contents_list.append(ch)
                assert chr_offset not in chr_to_byte_offset
                chr_to_byte_offset[chr_offset] = last_byte_offset
                chr_offset += 1
                last_byte_offset = byte_offset + 1
        final_by = decoder.decode(b'', True)  # flush
        assert final_by == '', final_by
        contents_str = ''.join(contents_list)
        # Ast uses [star,end), so need to also have the last+1 offset:
        assert chr_offset not in chr_to_byte_offset
        chr_to_byte_offset[chr_offset] = last_byte_offset

    lineno_to_chr_offset = {1: 0}
    lineno = 1
    for offset, char in enumerate(contents_str):
        # TODO: make this work with Windows '\r\n', Mac '\r'
        #       e.g., use contents_str.splitlines(keepends=True)
        #       (see code in ast_color.ColorFile._color_whitespace).
        if char == '\n':
            lineno += 1
            lineno_to_chr_offset[lineno] = offset + 1
    byte_to_chr_offset = {v: k for k, v in chr_to_byte_offset.items()}
    assert len(byte_to_chr_offset) == len(chr_to_byte_offset)  # no dup k,v / v,k
    return File(path=path,
                contents_bytes=contents_bytes,
                contents_str=contents_str,
                encoding=encoding,
                lineno_to_chr_offset=lineno_to_chr_offset,
                chr_to_byte_offset=chr_to_byte_offset,
                byte_to_chr_offset=byte_to_chr_offset,
                chr_offsets_for_linenos=sorted((offset, lineno)
                                               for lineno, offset in lineno_to_chr_offset.items()),
                numlines=lineno - 1)
Esempio n. 45
0
def read_file_using_source_encoding(filename):
    with open(filename, 'rb') as infile:
        encoding = tokenize.detect_encoding(infile.readline)[0]
    with io.open(filename, 'r', encoding=encoding) as infile:
        return infile.read()
Esempio n. 46
0
def FileEncoding(filename):
    """Return the file's encoding."""
    with open(filename, 'rb') as fd:
        return tokenize.detect_encoding(fd.readline)[0]