Python detect_encoding Examples, lib2to3.pgen2.tokenize.detect_encoding Python Examples

Example #1

0

Show file

File: file_resources.py Project: rahul2123/yapf

def FileEncoding(filename):
    """Return the file's encoding."""
    try:
        with open(filename, 'rb') as fd:
            return tokenize.detect_encoding(fd.readline)[0]
    except IOError:
        raise

Example #2

0

Show file

def ReadFile(filename, logger=None):
    """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]
    except IOError as err:
        if logger:
            logger(err)
        raise

    try:
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            source = fd.read()
        return source, encoding
    except IOError as err:
        if logger:
            logger(err)
        raise

Example #3

0

Show file

def IsFortranOrHeaderFile(filename, headers_too=True):
  """Return True if filename is a Fortran file."""
  if headers_too:
    if os.path.splitext(filename)[1] in ['.F','.F90','.f','.f90','.h']: # TODO: This can be dangerous. Esp. when it's a C-header.
      return True
  elif os.path.splitext(filename)[1] in ['.F','.F90','.f','.f90']:
    return True

  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]

    # Check for correctness of encoding.
    with py3compat.open_with_encoding(filename, encoding=encoding) as fd:
      fd.read()
  except UnicodeDecodeError:
    encoding = 'latin-1'
  except (IOError, SyntaxError):
    # If we fail to detect encoding (or the encoding cookie is incorrect - which
    # will make detect_encoding raise SyntaxError), assume it's not a Fortran
    # file.
    return False

  try:
    with py3compat.open_with_encoding(filename,
                                      mode='r',
                                      encoding=encoding) as fd:
      first_line = fd.readlines()[0]
  except (IOError, IndexError):
    return False

  # In all other cases assume everything is worse.
  return False

Example #4

0

Show file

def IsPythonFile(filename):
    """Return True if filename is a Python file."""
    if os.path.splitext(filename)[1] == '.py':
        return True

    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]

        # Check for correctness of encoding.
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            fd.read()
    except UnicodeDecodeError:
        encoding = 'latin-1'
    except (IOError, SyntaxError):
        # If we fail to detect encoding (or the encoding cookie is incorrect - which
        # will make detect_encoding raise SyntaxError), assume it's not a Python
        # file.
        return False

    try:
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            first_line = fd.readlines()[0]
    except (IOError, IndexError):
        return False

    return re.match(r'^#!.*\bpython[23]?\b', first_line)

Example #5

0

Show file

File: file_resources.py Project: Erguotou/pythonVSCode

def IsPythonFile(filename):
  """Return True if filename is a Python file."""
  if os.path.splitext(filename)[1] == '.py':
    return True

  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]

    # Check for correctness of encoding.
    with py3compat.open_with_encoding(filename, encoding=encoding) as fd:
      fd.read()
  except UnicodeDecodeError:
    encoding = 'latin-1'
  except (IOError, SyntaxError):
    # If we fail to detect encoding (or the encoding cookie is incorrect - which
    # will make detect_encoding raise SyntaxError), assume it's not a Python
    # file.
    return False

  try:
    with py3compat.open_with_encoding(filename,
                                      mode='r',
                                      encoding=encoding) as fd:
      first_line = fd.readlines()[0]
  except (IOError, IndexError):
    return False

  return re.match(r'^#!.*\bpython[23]?\b', first_line)

Example #6

0

Show file

File: yapf_api.py Project: hayd/yapf

def ReadFile(filename, logger=None):
  """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]
  except IOError as err:
    if logger:
      logger(err)
    raise

  try:
    with py3compat.open_with_encoding(filename, mode='r',
                                      encoding=encoding) as fd:
      source = fd.read()
    return source, encoding
  except IOError as err:
    if logger:
      logger(err)
    raise

Example #7

0

Show file

def IsPythonFile(filename):
    """Return True if filename is a Python file."""
    if os.path.splitext(filename)[1] == '.py':
        return True

    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]

        # Check for correctness of encoding.
        with py3compat.open_with_encoding(filename, encoding=encoding) as fd:
            fd.read()
    except UnicodeDecodeError:
        encoding = 'latin-1'
    except IOError:
        return False

    try:
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding) as fd:
            first_line = fd.readlines()[0]
    except (IOError, IndexError):
        return False

    return re.match(r'^#!.*\bpython[23]?\b', first_line)

Example #8

0

Show file

def _detect_encoding(readline):
    """Return file encoding."""
    try:
        from lib2to3.pgen2 import tokenize as lib2to3_tokenize
        encoding = lib2to3_tokenize.detect_encoding(readline)[0]
        return encoding
    except (LookupError, SyntaxError, UnicodeDecodeError):
        return 'latin-1'

Example #9

0

Show file

def _detect_encoding(readline):
    """Return file encoding."""
    try:
        from lib2to3.pgen2 import tokenize as lib2to3_tokenize
        encoding = lib2to3_tokenize.detect_encoding(readline)[0]
        return encoding
    except (LookupError, SyntaxError, UnicodeDecodeError):
        return 'latin-1'

Example #10

0

Show file

File: snippets.py Project: Shoobx/diff-cover

    def openpy(filename):
        from lib2to3.pgen2.tokenize import detect_encoding
        import io

        # The following is copied from tokenize.py in Python 3.2,
        # Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
        buffer = io.open(filename, 'rb')
        encoding, lines = detect_encoding(buffer.readline)
        buffer.seek(0)
        text = io.TextIOWrapper(buffer, encoding, line_buffering=True)
        text.mode = 'r'
        return text

Example #11

0

Show file

    def openpy(filename):
        from lib2to3.pgen2.tokenize import detect_encoding
        import io

        # The following is copied from tokenize.py in Python 3.2,
        # Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
        buffer = io.open(filename, 'rb')
        encoding, lines = detect_encoding(buffer.readline)
        buffer.seek(0)
        text = io.TextIOWrapper(buffer, encoding, line_buffering=True)
        text.mode = 'r'
        return text

Example #12

0

Show file

File: test_parser.py Project: loy2000/Ghidra

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)

Example #13

0

Show file

File: test_parser.py Project: Stewori/jython

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)

Example #14

0

Show file

def get_module_import_alias(import_name, text):
    try:
        text = text.encode(detect_encoding(BytesIO(text.encode()).readline)[0])
    except UnicodeEncodeError:  # Script contains unicode symbol. Cannot run detect_encoding as it requires ascii.
        text = text.encode('utf-8')
    try:
        ast.parse(text)
    except SyntaxError:  # Script contains syntax errors so cannot parse text
        return import_name
    for node in ast.walk(ast.parse(text)):
        if isinstance(node, ast.alias) and node.name == import_name:
            return node.asname
    return import_name

Example #15

0

Show file

File: _io.py Project: Zoynels/autopep8_quotes

def detect_encoding(filename: str) -> str:
    """Return file encoding."""
    from lib2to3.pgen2 import tokenize as lib2to3_tokenize
    mode = "rb"
    try:
        with open(filename, mode=mode) as input_file:
            encoding: str = lib2to3_tokenize.detect_encoding(input_file.readline)[0]  # type: ignore
            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding, mode=mode) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return "latin-1"

Example #16

0

Show file

File: eradicate.py Project: genba/eradicate

def detect_encoding(filename):
    """Return file encoding."""
    try:
        with open(filename, 'rb') as input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'

Example #17

0

Show file

 def open_with_encoding_check(filename):  # type: ignore
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     fp = io.open(filename, 'rb')
     try:
         encoding, lines = detect_encoding(fp.readline)
         fp.seek(0)
         text = io.TextIOWrapper(fp, encoding, line_buffering=True)
         text.mode = 'r'
         return text
     except:
         fp.close()
         raise

Example #18

0

Show file

File: test_parser.py Project: Kanma/Athena-Dependencies-Python

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
             fp.seek(0)
             source = fp.read()
             if encoding:
                 source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = str(tree)
         if encoding:
             new = new.encode(encoding)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)

Example #19

0

Show file

def detect_encoding(filename):
    """Return file encoding."""
    try:
        with open(filename, 'rb') as input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'

Example #20

0

Show file

File: test_parser.py Project: pracaas/python

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         print("Parsing %s..." % filepath)
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
             fp.seek(0)
             source = fp.read()
             if encoding:
                 source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = str(tree)
         if encoding:
             new = new.encode(encoding)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)

Example #21

0

Show file

File: test_parser.py Project: PandoraClub/WizaTV

 def test_all_project_files(self):
     if sys.platform.startswith("win"):
         # XXX something with newlines goes wrong on Windows.
         return
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with io.open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)

Example #22

0

Show file

File: __main__.py Project: liamzebedee/codebase_ontology

 def _read_python_source(self, filename):
     """
     Do our best to decode a Python source file correctly.
     """
     try:
         f = open(filename, "rb")
     except OSError as err:
         self.log_error("Can't open %s: %s", filename, err)
         return None, None
     try:
         encoding = tokenize.detect_encoding(f.readline)[0]
     finally:
         f.close()
     with _open_with_encoding(filename, "r", encoding=encoding) as f:
         return _from_system_newlines(f.read()), encoding

Example #23

0

Show file

File: test_parser.py Project: bq/witbox-updater

 def test_all_project_files(self):
     if sys.platform.startswith("win"):
         # XXX something with newlines goes wrong on Windows.
         return
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding, "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)

Example #24

0

Show file

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             print('ParseError on file', filepath, err)
             continue
         new = str(tree)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)

Example #25

0

Show file

File: test_parser.py Project: Kurios/Project32

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             print('ParseError on file', filepath, err)
             continue
         new = str(tree)
         if diff(filepath, new):
             self.fail("Idempotency failed: %s" % filepath)

Example #26

0

Show file

File: test_parser.py Project: shyamalschandra/jython-1

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         new = unicode(tree)
         diffResult = diff(filepath, new, encoding)
         if diffResult:
             self.fail(
                 "Idempotency failed: {} using {} encoding\n{}".format(
                     filepath, encoding, diffResult))

Example #27

0

Show file

File: test_parser.py Project: ZZHGit/ironpython3

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding, "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             if verbose > 0:
                 warnings.warn("ParseError on file %s (%s)" % (filepath, err))
             continue
         new = str(tree)
         x = diff(filepath, new)
         if x:
             self.fail("Idempotency failed: %s" % filepath)

Example #28

0

Show file

File: parsepy.py Project: indrajithbandara/importlab

 def parse_string(self, code_str):
     """Parse a program string and remove unwanted outer levels in AST."""
     # see lib2to3.tests.support.parse_string -- but we don't do the dedent
     # (support.reformat)
     if not isinstance(code_str, str):
         encoding, _ = tokenize2to3.detect_encoding(
             io.BytesIO(code_str).readline)
         code_str = str(code_str, encoding)
     features = refactor._detect_future_features(code_str)  # pylint: disable=protected-access
     if "print_function" in features:
         driver = self._drivers["no_print_statement"]
     else:
         driver = self._drivers["print_statement"]
     code_ast = driver.parse_string(code_str + "\n\n", debug=False)
     if code_ast:
         code_ast.parent = None
     return code_ast

Example #29

0

Show file

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError as err:
             if verbose > 0:
                 warnings.warn('ParseError on file %s (%s)' % (filepath, err))
             continue
         new = str(tree)
         x = diff(filepath, new)
         if x:
             self.fail("Idempotency failed: %s" % filepath)

Example #30

0

Show file

File: test_parser.py Project: FISHackathon2020/RAN

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError:
             try:
                 tree = driver_no_print_statement.parse_string(source)
             except ParseError as err:
                 self.fail('ParseError on file %s (%s)' % (filepath, err))
         new = str(tree)
         if new != source:
             print(diff_texts(source, new, filepath))
             self.fail("Idempotency failed: %s" % filepath)

Example #31

0

Show file

File: test_parser.py Project: DinoV/cpython

 def test_all_project_files(self):
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertIsNotNone(encoding,
                              "can't detect encoding for %s" % filepath)
         with open(filepath, "r", encoding=encoding) as fp:
             source = fp.read()
         try:
             tree = driver.parse_string(source)
         except ParseError:
             try:
                 tree = driver_no_print_statement.parse_string(source)
             except ParseError as err:
                 self.fail('ParseError on file %s (%s)' % (filepath, err))
         new = str(tree)
         if new != source:
             print(diff_texts(source, new, filepath))
             self.fail("Idempotency failed: %s" % filepath)

Example #32

0

Show file

File: test_parser.py Project: jschementi/iron

    def test_all_project_files(self):
        if sys.platform.startswith("win"):
            # XXX something with newlines goes wrong on Windows.
            return
        for filepath in support.all_project_files():
            with open(filepath, "rb") as fp:
                encoding = tokenize.detect_encoding(fp.readline)[0]
            self.assertTrue(encoding is not None, "can't detect encoding for %s" % filepath)
            with open(filepath, "r") as fp:
                source = fp.read()
                source = source.decode(encoding)
            tree = driver.parse_string(source)

            from test import test_support

            if test_support.due_to_ironpython_bug("http://ironpython.codeplex.com/workitem/28171"):
                continue
            new = unicode(tree)
            if diff(filepath, new, encoding):
                self.fail("Idempotency failed: %s" % filepath)

Example #33

0

Show file

 def test_all_project_files(self):
     if sys.platform.startswith("win"):
         # XXX something with newlines goes wrong on Windows.
         return
     for filepath in support.all_project_files():
         with open(filepath, "rb") as fp:
             encoding = tokenize.detect_encoding(fp.readline)[0]
         self.assertTrue(encoding is not None,
                         "can't detect encoding for %s" % filepath)
         with open(filepath, "r") as fp:
             source = fp.read()
             source = source.decode(encoding)
         tree = driver.parse_string(source)
         
         from test import test_support
         if test_support.due_to_ironpython_bug("http://ironpython.codeplex.com/workitem/28171"):
             continue
         new = unicode(tree)
         if diff(filepath, new, encoding):
             self.fail("Idempotency failed: %s" % filepath)

Example #34

0

Show file

File: execution.py Project: robertapplin/mantid

def _get_imported_from_future(code_str):
    """
    Parse the given code and return a list of names that are imported
    from __future__.
    :param code_str: The code to parse
    :return list: List of names that are imported from __future__
    """
    future_imports = []
    try:
        code_str = code_str.encode(
            detect_encoding(BytesIO(code_str.encode()).readline)[0])
    except UnicodeEncodeError:  # Script contains unicode symbol. Cannot run detect_encoding as it requires ascii.
        code_str = code_str.encode('utf-8')
    for node in ast.walk(ast.parse(code_str)):
        if isinstance(node, ast.ImportFrom):
            if node.module == '__future__':
                future_imports.extend(
                    [import_alias.name for import_alias in node.names])
                break
    return future_imports

Example #35

0

Show file

def ReadFile(filename, logger=None):
    """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
    try:
        with open(filename, 'rb') as fd:
            encoding = tokenize.detect_encoding(fd.readline)[0]
    except IOError as err:
        if logger:
            logger(err)
        raise

    try:
        # Preserves line endings.
        with py3compat.open_with_encoding(filename,
                                          mode='r',
                                          encoding=encoding,
                                          newline='') as fd:
            lines = fd.readlines()

        line_ending = file_resources.LineEnding(lines)
        source = '\n'.join(line.rstrip('\r\n') for line in lines) + '\n'
        return source, line_ending, encoding
    except IOError as err:  # pragma: no cover
        if logger:
            logger(err)
        raise

Example #36

0

Show file

File: _util.py Project: lugnitdgp/avskr2.0

def detect_encoding(filename):
    """Return file encoding."""

    try:
        input_file = open(filename, 'rb')
    except (IOError, OSError):
        # If the file doesn't exist, return the same thing
        # detect_encoding gives us for an empty file, utf-8.
        return 'utf-8'

    try:
        with input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'

Example #37

0

Show file

File: _util.py Project: cassella/scspell

def detect_encoding(filename):
    """Return file encoding."""

    try:
        input_file = open(filename, 'rb')
    except (IOError, OSError):
        # If the file doesn't exist, return the same thing
        # detect_encoding gives us for an empty file, utf-8.
        return 'utf-8'

    try:
        with input_file:
            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

            # Check for correctness of encoding.
            with open_with_encoding(filename, encoding) as input_file:
                input_file.read()

        return encoding
    except (SyntaxError, LookupError, UnicodeDecodeError):
        return 'latin-1'

Example #38

0

Show file

File: yapf_api.py Project: sudoconf/yapf

def ReadFile(filename, logger=None):
  """Read the contents of the file.

  An optional logger can be specified to emit messages to your favorite logging
  stream. If specified, then no exception is raised. This is external so that it
  can be used by third-party applications.

  Arguments:
    filename: (unicode) The name of the file.
    logger: (function) A function or lambda that takes a string and emits it.

  Returns:
    The contents of filename.

  Raises:
    IOError: raised if there was an error reading the file.
  """
  try:
    with open(filename, 'rb') as fd:
      encoding = tokenize.detect_encoding(fd.readline)[0]
  except IOError as err:
    if logger:
      logger(err)
    raise

  try:
    # Preserves line endings.
    with py3compat.open_with_encoding(
        filename, mode='r', encoding=encoding, newline='') as fd:
      lines = fd.readlines()

    line_ending = file_resources.LineEnding(lines)
    source = '\n'.join(line.rstrip('\r\n') for line in lines) + '\n'
    return source, line_ending, encoding
  except IOError as err:  # pragma: no cover
    if logger:
      logger(err)
    raise

Example #39

0

Show file

def read_file_using_source_encoding(filename):
    with open(filename, 'rb') as infile:
        encoding = tokenize.detect_encoding(infile.readline)[0]
    with io.open(filename, 'r', encoding=encoding) as infile_with_encoding:
        return infile_with_encoding.read()

Example #40

0

Show file

def decode_string_using_source_encoding(b):
    encoding = tokenize.detect_encoding(io.BytesIO(b).readline)[0]
    return b.decode(encoding)

Example #41

0

Show file

File: file_resources.py Project: bowans/yapf

def FileEncoding(filename):
  """Return the file's encoding."""
  with open(filename, 'rb') as fd:
    return tokenize.detect_encoding(fd.readline)[0]

Example #42

0

Show file

File: flake8.py Project: habnabit/ebb-lint

def decode_string_using_source_encoding(b):
    encoding = tokenize.detect_encoding(io.BytesIO(b).readline)[0]
    return b.decode(encoding)

Example #43

0

Show file

def detect_encoding(pyFile):
	from lib2to3.pgen2 import tokenize
	f=open(pyFile,'rb')
	return tokenize.detect_encoding(f.readline)[0]

Example #44

0

Show file

File: ast.py Project: saidctb/pykythe

def make_file_from_contents(path: str, contents_bytes: bytes) -> File:
    """Wrapper for File constructor.

    Computes the line offsets and creates a `File` object from
    `contents_bytes`. (`path` and `encoding` are passed through to the
    `File` object.)
    """
    # pylint: disable=too-many-locals
    with io.BytesIO(contents_bytes) as src_f:
        try:
            encoding, _ = tokenize.detect_encoding(src_f.readline)  # type: ignore
        except LookupError as exc:
            # TODO: first arg of UnicodeDecodError is encoding, but we
            #       don't know that, so this is an inappropriate error
            #       to raise.
            raise UnicodeDecodeError('???', contents_bytes, 0, 1, str(exc))
        if encoding == 'utf8-sig':  # TODO: see https://bugs.python.org/issue39155
            encoding = 'utf-8-sig'
        decoder = codecs.getincrementaldecoder(encoding)()
        chr_to_byte_offset: Dict[int, int] = {}
        chr_offset = 0
        last_byte_offset = 0
        contents_list = []
        for byte_offset, by in enumerate(contents_bytes):
            # TODO: benchmark other methods of converting an int to a byte:
            #     by.to_bytes(1, sys.byteorder, signed=False))
            #     struct.unpack('1c', by))[0]
            #     chr(by).encode('latin1')
            #   (probably these are all dwarfed by the time used
            #   to process the AST)
            ch = decoder.decode(bytes([by]))  # Can raies UnicodeDecodeError
            if ch:
                contents_list.append(ch)
                assert chr_offset not in chr_to_byte_offset
                chr_to_byte_offset[chr_offset] = last_byte_offset
                chr_offset += 1
                last_byte_offset = byte_offset + 1
        final_by = decoder.decode(b'', True)  # flush
        assert final_by == '', final_by
        contents_str = ''.join(contents_list)
        # Ast uses [star,end), so need to also have the last+1 offset:
        assert chr_offset not in chr_to_byte_offset
        chr_to_byte_offset[chr_offset] = last_byte_offset

    lineno_to_chr_offset = {1: 0}
    lineno = 1
    for offset, char in enumerate(contents_str):
        # TODO: make this work with Windows '\r\n', Mac '\r'
        #       e.g., use contents_str.splitlines(keepends=True)
        #       (see code in ast_color.ColorFile._color_whitespace).
        if char == '\n':
            lineno += 1
            lineno_to_chr_offset[lineno] = offset + 1
    byte_to_chr_offset = {v: k for k, v in chr_to_byte_offset.items()}
    assert len(byte_to_chr_offset) == len(chr_to_byte_offset)  # no dup k,v / v,k
    return File(path=path,
                contents_bytes=contents_bytes,
                contents_str=contents_str,
                encoding=encoding,
                lineno_to_chr_offset=lineno_to_chr_offset,
                chr_to_byte_offset=chr_to_byte_offset,
                byte_to_chr_offset=byte_to_chr_offset,
                chr_offsets_for_linenos=sorted((offset, lineno)
                                               for lineno, offset in lineno_to_chr_offset.items()),
                numlines=lineno - 1)

Example #45

0

Show file

File: flake8.py Project: jayvdb/ebb-lint

def read_file_using_source_encoding(filename):
    with open(filename, 'rb') as infile:
        encoding = tokenize.detect_encoding(infile.readline)[0]
    with io.open(filename, 'r', encoding=encoding) as infile:
        return infile.read()

Example #46

0

Show file

File: file_resources.py Project: ymjia/emacs_config_windows

def FileEncoding(filename):
    """Return the file's encoding."""
    with open(filename, 'rb') as fd:
        return tokenize.detect_encoding(fd.readline)[0]