Python detect_encoding 예제들, tokenize.detect_encoding Python 예제들

예제 #1

0

파일 보기

파일: test_tokenize.py 프로젝트: 5outh/Databases-Fall2014

    def test_filename_in_exception(self):
        # When possible, include the file name in the exception.
        path = 'some_file_path'
        lines = (
            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
            )
        class Bunk:
            def __init__(self, lines, path):
                self.name = path
                self._lines = lines
                self._index = 0

            def readline(self):
                if self._index == len(lines):
                    raise StopIteration
                line = lines[self._index]
                self._index += 1
                return line

        with self.assertRaises(SyntaxError):
            ins = Bunk(lines, path)
            # Make sure lacking a name isn't an issue.
            del ins.name
            detect_encoding(ins.readline)
        with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
            ins = Bunk(lines, path)
            detect_encoding(ins.readline)

예제 #2

0

파일 보기

파일: encoding.py 프로젝트: landscapeio/prospector

def read_py_file(filepath):
    if sys.version_info < (3, ):
        return open(filepath, 'rU').read()
    else:
        # see https://docs.python.org/3/library/tokenize.html#tokenize.detect_encoding
        # first just see if the file is properly encoded
        try:
            with open(filepath, 'rb') as f:
                tokenize.detect_encoding(f.readline)
        except SyntaxError as err:
            # this warning is issued:
            #   (1) in badly authored files (contains non-utf8 in a comment line)
            #   (2) a coding is specified, but wrong and
            #   (3) no coding is specified, and the default
            #       'utf8' fails to decode.
            #   (4) the encoding specified by a pep263 declaration did not match
            #       with the encoding detected by inspecting the BOM
            raise CouldNotHandleEncoding(filepath, err)

        try:
            return tokenize.open(filepath).read()
            # this warning is issued:
            #   (1) if uft-8 is specified, but latin1 is used with something like \x0e9 appearing
            #       (see http://stackoverflow.com/a/5552623)
        except UnicodeDecodeError as err:
            raise CouldNotHandleEncoding(filepath, err)

예제 #3

0

파일 보기

파일: pythonmode.py 프로젝트: hihihippp/kaa

 def update_fileinfo(cls, fileinfo, document=None):
     import tokenize
     if not document:
         try:
             with open(fileinfo.fullpathname, 'rb') as buffer:
                 encoding, lines = tokenize.detect_encoding(buffer.readline)
                 fileinfo.encoding = encoding
         except IOError:
             pass
     else:
         s = document.gettext(0, 1024).encode('utf-8', errors='ignore')
         buffer = io.BytesIO(s)
         encoding, lines = tokenize.detect_encoding(buffer.readline)
         fileinfo.encoding = encoding

예제 #4

0

파일 보기

파일: sixer.py 프로젝트: mscuthbert/sixer

    def patch(self, filename):
        self.current_file = filename

        with tokenize.open(filename) as fp:
            content = fp.read()

        old_content = content
        for operation in self.operations:
            content = operation.patch(content)

        if content == old_content:
            # no change
            self.check(content)
            if self.options.to_stdout:
                self.write_stdout(content)
            return False

        with open(filename, "rb") as fp:
            encoding, _ = tokenize.detect_encoding(fp.readline)

        if not self.options.quiet:
            print("Patch %s" % filename)
        if not self.options.to_stdout:
            with open(filename, "w", encoding=encoding) as fp:
                fp.write(content)
        else:
            self.write_stdout(content)
        self.check(content)
        return True

예제 #5

0

파일 보기

파일: builder.py 프로젝트: guliziskender/pythonRestAPI

def open_source_file(filename):
    # pylint: disable=consider-using-with
    with open(filename, "rb") as byte_stream:
        encoding = detect_encoding(byte_stream.readline)[0]
    stream = open(filename, newline=None, encoding=encoding)
    data = stream.read()
    return stream, encoding, data

예제 #6

0

파일 보기

파일: insert_suppressions.py 프로젝트: williamlw999-fb/Fixit

def insert_suppressions(
    source: bytes,
    comments: Iterable[SuppressionComment],
    *,
    code_width: int = DEFAULT_CODE_WIDTH,
    min_comment_width: int = DEFAULT_MIN_COMMENT_WIDTH,
) -> InsertSuppressionsResult:
    """
    Given an iterable of `lines`, forms a new sequence of lines with `comments`
    inserted.
    """
    encoding = tokenize.detect_encoding(BytesIO(source).readline)[0]
    tokens = tuple(tokenize.tokenize(BytesIO(source).readline))
    indentations = _get_indentations(tokens)
    physical_to_logical = LineMappingInfo.compute(tokens=tokens).physical_to_logical
    comments_queue = deque(sorted(comments))  # sort by line number
    updated_lines = []

    for line_number, line_bytes in enumerate(BytesIO(source).readlines(), start=1):
        while comments_queue:
            target_line = physical_to_logical[comments_queue[0].before_line]
            if target_line == line_number:
                indent = indentations[line_number]
                width = max(code_width - len(indent), min_comment_width)
                for line in comments_queue.popleft().to_lines(width):
                    updated_lines.append(f"{indent}{line}\n".encode(encoding))
            else:
                break
        updated_lines.append(line_bytes)

    return InsertSuppressionsResult(
        updated_source=b"".join(updated_lines), failed_insertions=tuple(comments_queue)
    )

예제 #7

0

파일 보기

def roundtrip(filename, output=sys.stdout):
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    tree = compile(source, filename, "exec", ast.PyCF_ONLY_AST)
    Unparser(tree, output)

예제 #8

0

파일 보기

파일: abc.py 프로젝트: vloginova/mybuild

    def get_source(self, fullname):
        """Concrete implementation of InspectLoader.get_source."""
        path = self.get_filename(fullname)
        try:
            source_bytes = self.get_data(path)
        except IOError:
            raise ImportError("source not available through get_data()")

        if py3k:
            import io, tokenize

            readsource = io.BytesIO(source_bytes).readline
            try:
                encoding = tokenize.detect_encoding(readsource)
            except SyntaxError as exc:
                raise ImportError("Failed to detect encoding")

            newline_decoder = io.IncrementalNewlineDecoder(None, True)
            try:
                return newline_decoder.decode(source_bytes.decode(encoding[0]))
            except UnicodeDecodeError as exc:
                raise ImportError("Failed to decode source file")

        else:
            return source_bytes  # XXX proper encoding

예제 #9

0

파일 보기

파일: openpy.py 프로젝트: BlackEarth/portable-python-win32

def read_py_url(url, errors='replace', skip_encoding_cookie=True):
    """Read a Python file from a URL, using the encoding declared inside the file.
    
    Parameters
    ----------
    url : str
      The URL from which to fetch the file.
    errors : str
      How to handle decoding errors in the file. Options are the same as for
      bytes.decode(), but here 'replace' is the default.
    skip_encoding_cookie : bool
      If True (the default), and the encoding declaration is found in the first
      two lines, that line will be excluded from the output - compiling a
      unicode string with an encoding declaration is a SyntaxError in Python 2.
    
    Returns
    -------
    A unicode string containing the contents of the file.
    """
    response = urllib.request.urlopen(url)
    buffer = io.BytesIO(response.read())
    encoding, lines = detect_encoding(buffer.readline)
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return "".join(strip_encoding_cookie(text))
    else:
        return text.read()

예제 #10

0

파일 보기

    def __init__(self,
                 source: IO,
                 modname: str,
                 srcname: str,
                 decoded: bool = False) -> None:
        self.modname = modname  # name of the module
        self.srcname = srcname  # name of the source file

        # cache the source code as well
        pos = source.tell()
        if not decoded:
            warnings.warn('decode option for ModuleAnalyzer is deprecated.',
                          RemovedInSphinx40Warning,
                          stacklevel=2)
            self._encoding, _ = tokenize.detect_encoding(source.readline)
            source.seek(pos)
            self.code = source.read().decode(self._encoding)
        else:
            self._encoding = None
            self.code = source.read()

        # will be filled by parse()
        self.annotations = None  # type: Dict[Tuple[str, str], str]
        self.attr_docs = None  # type: Dict[Tuple[str, str], List[str]]
        self.finals = None  # type: List[str]
        self.overloads = None  # type: Dict[str, List[Signature]]
        self.tagorder = None  # type: Dict[str, int]
        self.tags = None  # type: Dict[str, Tuple[str, int, int]]

예제 #11

0

파일 보기

 def test_cookie_second_line_empty_first_line(self):
     lines = (b'\n', b'# vim: set fileencoding=iso8859-15 :\n',
              b"print('\xe2\x82\xac')\n")
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso8859-15')
     expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
     self.assertEqual(consumed_lines, expected)

예제 #12

0

파일 보기

 def test_matched_bom_and_cookie_second_line(self):
     lines = (b'\xef\xbb\xbf#! something\n', b'f# coding=utf-8\n',
              b'print(something)\n', b'do_something(else)\n')
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8-sig')
     self.assertEqual(consumed_lines,
                      [b'#! something\n', b'f# coding=utf-8\n'])

예제 #13

0

파일 보기

 def test_cookie_second_line_no_bom(self):
     lines = (b'#! something\n', b'# vim: set fileencoding=ascii :\n',
              b'print(something)\n', b'do_something(else)\n')
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'ascii')
     expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
     self.assertEqual(consumed_lines, expected)

예제 #14

0

파일 보기

 def test_bom_no_cookie(self):
     lines = (b'\xef\xbb\xbf# something\n', b'print(something)\n',
              b'do_something(else)\n')
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8-sig')
     self.assertEqual(consumed_lines,
                      [b'# something\n', b'print(something)\n'])

예제 #15

0

파일 보기

def check(file):
    """check(file_or_dir)

    If file_or_dir is a directory and not a symbolic link, then recursively
    descend the directory tree named by file_or_dir, checking all .py files
    along the way. If file_or_dir is an ordinary Python source file, it is
    checked for whitespace related problems. The diagnostic messages are
    written to standard output using the print statement.
    """

    if os.path.isdir(file) and not os.path.islink(file):
        if verbose:
            print("%r: listing directory" % (file, ))
        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if (os.path.isdir(fullname) and not os.path.islink(fullname)
                    or os.path.normcase(name[-3:]) == ".py"):
                check(fullname)
        return

    with open(file, 'rb') as f:
        encoding, lines = tokenize.detect_encoding(f.readline)

    try:
        f = open(file, encoding=encoding)
    except IOError as msg:
        errprint("%r: I/O Error: %s" % (file, msg))
        return

    if verbose > 1:
        print("checking %r ..." % file)

    try:
        process_tokens(tokenize.generate_tokens(f.readline))

    except tokenize.TokenError as msg:
        errprint("%r: Token Error: %s" % (file, msg))
        return

    except IndentationError as msg:
        errprint("%r: Indentation Error: %s" % (file, msg))
        return

    except NannyNag as nag:
        badline = nag.get_lineno()
        line = nag.get_line()
        if verbose:
            print("%r: *** Line %d: trouble in tab city! ***" %
                  (file, badline))
            print("offending line: %r" % (line, ))
            print(nag.get_msg())
        else:
            if ' ' in file: file = '"' + file + '"'
            if filename_only: print(file)
            else: print(file, badline, repr(line))
        return

    if verbose:
        print("%r: Clean bill of health." % (file, ))

예제 #16

0

파일 보기

파일: bug.py 프로젝트: ysangkok/jsppa

def roundtrip(filename, output=sys.stdout):
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    tree = compile(source, filename, "exec", ast.PyCF_ONLY_AST)
    Unparser(tree, output)

예제 #17

0

파일 보기

파일: utils.py 프로젝트: awello/zest.releaser

def read_text_file(filename, encoding=None):
    """Read text file.

    Give back the contents, and the encoding we used.

    Unless specified manually, We have no way of knowing what text
    encoding this file may be in.

    The standard Python 'open' method uses the default system encoding
    to read text files in Python 3 or falls back to utf-8.

    On Python 3 we can use tokenize to detect the encoding.

    On Python 2 we can use chardet to detect the encoding.

    """
    # Only if the encoding is not manually specified, we may try to
    # detect it.
    if encoding is None and detect_encoding is not None:
        with open(filename, 'rb') as filehandler:
            encoding = detect_encoding(filehandler.readline)[0]

    with open(filename, 'rb') as filehandler:
        data = filehandler.read()

    if encoding is not None:
        return data.decode(encoding), encoding

    if HAVE_CHARDET:
        encoding_result = chardet.detect(data)
        if encoding_result and encoding_result['encoding'] is not None:
            encoding = encoding_result['encoding']
            return data.decode(encoding), encoding

    # Look for hints, PEP263-style
    if data[:3] == b'\xef\xbb\xbf':
        encoding = 'utf-8'
        return data.decode(encoding), encoding

    data_len = len(data)
    for canary in ENCODING_HINTS:
        if canary in data:
            pos = data.index(canary)
            if pos > 1 and data[pos - 1] not in (b' ', b'\n', b'\r'):
                continue
            pos += len(canary)
            coding = b''
            while pos < data_len and data[pos] not in (b' ', b'\n'):
                coding += data[pos]
                pos += 1
            encoding = coding.decode('ascii').strip()
            try:
                return data.decode(encoding), encoding
            except (LookupError, UnicodeError):
                # Try the next one
                pass

    # Fall back to utf-8
    encoding = 'utf-8'
    return data.decode(encoding), encoding

예제 #18

0

파일 보기

def read_py_url(url, errors='replace', skip_encoding_cookie=True):
    """Read a Python file from a URL, using the encoding declared inside the file.
    
    Parameters
    ----------
    url : str
      The URL from which to fetch the file.
    errors : str
      How to handle decoding errors in the file. Options are the same as for
      bytes.decode(), but here 'replace' is the default.
    skip_encoding_cookie : bool
      If True (the default), and the encoding declaration is found in the first
      two lines, that line will be excluded from the output - compiling a
      unicode string with an encoding declaration is a SyntaxError in Python 2.
    
    Returns
    -------
    A unicode string containing the contents of the file.
    """
    response = urllib.request.urlopen(url)
    buffer = io.BytesIO(response.read())
    encoding, lines = detect_encoding(buffer.readline)
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return "".join(strip_encoding_cookie(text))
    else:
        return text.read()

예제 #19

0

파일 보기

파일: openpy.py 프로젝트: 3kwa/ipython

def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
    """Converts a bytes string with python source code to unicode.

    Unicode strings are passed through unchanged. Byte strings are checked
    for the python source file encoding cookie to determine encoding.
    txt can be either a bytes buffer or a string containing the source
    code.
    """
    if isinstance(txt, unicode):
        return txt
    if isinstance(txt, bytes):
        buffer = BytesIO(txt)
    else:
        buffer = txt
    try:
        encoding, _ = detect_encoding(buffer.readline)
    except SyntaxError:
        encoding = "ascii"
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return u"".join(strip_encoding_cookie(text))
    else:
        return text.read()

예제 #20

0

파일 보기

def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
    """Converts a bytes string with python source code to unicode.

    Unicode strings are passed through unchanged. Byte strings are checked
    for the python source file encoding cookie to determine encoding.
    txt can be either a bytes buffer or a string containing the source
    code.
    """
    if isinstance(txt, unicode_type):
        return txt
    if isinstance(txt, bytes):
        buf = BytesIO(txt)
    else:
        buf = txt
    try:
        encoding, _ = detect_encoding(buf.readline)
    except SyntaxError:
        encoding = "ascii"
    buf.seek(0)
    text = TextIOWrapper(buf, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return u"".join(strip_encoding_cookie(text))
    else:
        return text.read()

예제 #21

0

파일 보기

파일: console.py 프로젝트: StuxSoftware/AnimaFX

    def execute(self):
        # Try to detect the encoding for you.
        with open(self.script, 'rb') as file:
            try:
                encoding = tokenize.detect_encoding(file.readline)[0]
            except SyntaxError:
                encoding = "utf-8"

        # Set the global values for the module.
        global_values = {
            '__file__': self.script,       # Use actual filename of the script.
            '__name__': '__main__'         # Make sure that 'if __name__ == "__main__"'-hook works
        }

        with open(self.script, 'r', encoding=encoding) as file:
            # Do not inherit any 'from future import ...'-statements
            # that may be used by AnimaFX.
            # Additionally set the current filename.
            module = compile(file.read(), self.script, 'exec', False)

        try:
            exec(module, global_values)
        # Reraise any occuring exceptions
        except (SystemExit, KeyboardInterrupt) as e:
            raise e

        # Print the exception
        except BaseException as e:
            traceback.print_exception(e.__class__, e, e.__traceback__)
            return False

        return True

예제 #22

0

파일 보기

 def encode(self, chars):
     if isinstance(chars, bytes):
         # This is either plain ASCII, or Tk was returning mixed-encoding
         # text to us. Don't try to guess further.
         return chars
     # Preserve a BOM that might have been present on opening
     if self.fileencoding == 'utf-8-sig':
         return chars.encode('utf-8-sig')
     # See whether there is anything non-ASCII in it.
     # If not, no need to figure out the encoding.
     try:
         return chars.encode('ascii')
     except UnicodeEncodeError:
         pass
     # Check if there is an encoding declared
     try:
         encoded = chars.encode('ascii', 'replace')
         enc, _ = tokenize.detect_encoding(io.BytesIO(encoded).readline)
         return chars.encode(enc)
     except SyntaxError as err:
         failed = str(err)
     except UnicodeEncodeError:
         failed = "Invalid encoding '%s'" % enc
     messagebox.showerror(
         "I/O Error",
         "%s.\nSaving as UTF-8" % failed,
         parent=self.text)
     # Fallback: save as UTF-8, with BOM - ignoring the incorrect
     # declared encoding
     return chars.encode('utf-8-sig')

예제 #23

0

파일 보기

파일: SourceReading.py 프로젝트: Xzarh/Nuitka

def _readSourceCodeFromFilename3(source_filename):
    import tokenize

    try:
        with open(source_filename, "rb") as source_file:
            encoding = tokenize.detect_encoding(source_file.readline)[0]  # @UndefinedVariable

            # Rewind to get the whole file.
            source_file.seek(0)

            source_code = source_file.read()

        return source_code.decode(encoding)
    except SyntaxError as e:
        if Options.isFullCompat():
            if PythonVersions.doShowUnknownEncodingName():
                match = re.match("unknown encoding for '.*?': (.*)", e.args[0])
                complaint = match.group(1)
            else:
                complaint = "with BOM"

            e.args = (
                "encoding problem: %s" % complaint,
                (source_filename, 1, None, None)
            )

            if hasattr(e, "msg"):
                e.msg = e.args[0]

        raise

예제 #24

0

파일 보기

파일: finder.py 프로젝트: Aetra/LangProject_Minger_Vermaut_Hajek_Ruhland_Zlaktov

    def _LoadModule(self, name, fp, path, info, deferredImports,
            parent = None, namespace = False):
        """Load the module, given the information acquired by the finder."""
        suffix, mode, type = info
        if type == imp.PKG_DIRECTORY:
            return self._LoadPackage(name, path, parent, deferredImports,
                    namespace)
        module = self._AddModule(name, file_name=path, parent=parent)

        if type == imp.PY_SOURCE:
            logging.debug("Adding module [%s] [PY_SOURCE]", name)
            # Load & compile Python source code
            fp = open(path, "rb")
            encoding, lines = tokenize.detect_encoding(fp.readline)
            fp = open(path, "U", encoding = encoding)
            codeString = fp.read()
            if codeString and codeString[-1] != "\n":
                codeString = codeString + "\n"
            try:
                module.code = compile(codeString, path, "exec")
            except SyntaxError:
                raise ImportError("Invalid syntax in %s" % path)
        
        elif type == imp.PY_COMPILED:
            logging.debug("Adding module [%s] [PY_COMPILED]", name)
            # Load Python bytecode
            if isinstance(fp, bytes):
                magic = fp[:4]
            else:
                magic = fp.read(4)
            if magic != imp.get_magic():
                raise ImportError("Bad magic number in %s" % path)
            skip_bytes = 8
            if isinstance(fp, bytes):
                module.code = marshal.loads(fp[skip_bytes+4:])
                module.source_is_zip_file = True
            else:
                fp.read(skip_bytes)
                module.code = marshal.load(fp)
        
        elif type == imp.C_EXTENSION:
            logging.debug("Adding module [%s] [C_EXTENSION]", name)

        # If there's a custom hook for this module, run it.
        self._RunHook("load", module.name, module)
        
        if module.code is not None:
            if self.replace_paths:
                topLevelModule = module
                while topLevelModule.parent is not None:
                    topLevelModule = topLevelModule.parent
                module.code = self._ReplacePathsInCode(topLevelModule,
                        module.code)
            
            # Scan the module code for import statements
            self._ScanCode(module.code, module, deferredImports)
        
        module.in_import = False
        return module

예제 #25

0

파일 보기

파일: module.py 프로젝트: terrence2/millipede

	def _read_file(filename):
		# read the file contents, obeying the python encoding marker
		with open(filename, 'rb') as fp:
			encoding, _ = tokenize.detect_encoding(fp.readline)
		with open(filename, 'rt', encoding=encoding) as fp:
			content = fp.read()
		content += '\n\n'
		return content

예제 #26

0

파일 보기

파일: script_parser.py 프로젝트: juanrodriguezmonti/autopython

def read_source_code(filename):
    with open(filename, 'rb') as source_file:
        encoding, first_lines = tokenize.detect_encoding(source_file.readline)
        source_bytes = b''.join(first_lines) + source_file.read()

    newline_decoder = io.IncrementalNewlineDecoder(None, translate=True)
    source_code = newline_decoder.decode(source_bytes.decode(encoding))
    return source_code.splitlines(True)

예제 #27

0

파일 보기

def read_pyfile(filename):
    """Read and return the contents of a Python source file (as a
    string), taking into account the file encoding."""
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    return source

예제 #28

0

파일 보기

def _stdin_get_value_py3():  # type: () -> str
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        coding, _ = tokenize.detect_encoding(fd.readline)
        return stdin_value.decode(coding)
    except (LookupError, SyntaxError, UnicodeError):
        return stdin_value.decode("utf-8")

예제 #29

0

파일 보기

파일: utils.py 프로젝트: HeitorGonzaga/simple_list

def _stdin_get_value_py3():
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        (coding, lines) = tokenize.detect_encoding(fd.readline)
        return io.StringIO(stdin_value.decode(coding))
    except (LookupError, SyntaxError, UnicodeError):
        return io.StringIO(stdin_value.decode("utf-8"))

예제 #30

0

파일 보기

파일: utils.py 프로젝트: rcharp/airform

def _stdin_get_value_py3():
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        (coding, lines) = tokenize.detect_encoding(fd.readline)
        return io.StringIO(stdin_value.decode(coding))
    except (LookupError, SyntaxError, UnicodeError):
        return io.StringIO(stdin_value.decode("utf-8"))

예제 #31

0

파일 보기

 def test_cookie_second_line_noncommented_first_line(self):
     lines = (b"print('\xc2\xa3')\n",
              b'# vim: set fileencoding=iso8859-15 :\n',
              b"print('\xe2\x82\xac')\n")
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     expected = [b"print('\xc2\xa3')\n"]
     self.assertEqual(consumed_lines, expected)

예제 #32

0

파일 보기

파일: test_unparse.py 프로젝트: d11/rts

def read_pyfile(filename):
    """Read and return the contents of a Python source file (as a
    string), taking into account the file encoding."""
    with open(filename, "rb") as pyfile:
        encoding = tokenize.detect_encoding(pyfile.readline)[0]
    with open(filename, "r", encoding=encoding) as pyfile:
        source = pyfile.read()
    return source

예제 #33

0

파일 보기

    def _LoadModule(self, name, fp, path, info, deferredImports, parent=None):
        """Load the module, given the information acquired by the finder."""
        suffix, mode, type = info
        if type == imp.PKG_DIRECTORY:
            return self._LoadPackage(name, path, parent, deferredImports)
        module = self._AddModule(name, file_name=path, parent=parent)

        if type == imp.PY_SOURCE:
            logging.debug("Adding module [%s] [PY_SOURCE]", name)
            # Load & compile Python source code
            # if file opened, it already use good encoding; else detect it manually
            if not fp:
                with open(path, "rb") as f:
                    encoding = tokenize.detect_encoding(f.readline)[0]
                fp = open(path, "r", encoding=encoding)
            codeString = fp.read()
            if codeString and codeString[-1] != "\n":
                codeString = codeString + "\n"
            try:
                module.code = compile(codeString,
                                      path,
                                      "exec",
                                      optimize=self.optimizeFlag)
            except SyntaxError:
                raise ImportError("Invalid syntax in %s" % path)

        elif type == imp.PY_COMPILED:
            logging.debug("Adding module [%s] [PY_COMPILED]", name)
            # Load Python bytecode
            if isinstance(fp, bytes):
                fp = io.BytesIO(fp)
                module.source_is_zip_file = True
            module.code = pkgutil.read_code(fp)
            if module.code is None:
                raise ImportError("Bad magic number in %s" % path)

        elif type == imp.C_EXTENSION:
            logging.debug("Adding module [%s] [C_EXTENSION]", name)

        # If there's a custom hook for this module, run it.
        self._RunHook("load", module.name, module)

        if module.code is not None:
            if self.replace_paths:
                topLevelModule = module
                while topLevelModule.parent is not None:
                    topLevelModule = topLevelModule.parent
                module.code = self._ReplacePathsInCode(topLevelModule,
                                                       module.code)

            # Scan the module code for import statements
            self._ScanCode(module.code, module, deferredImports)

            # Verify __package__ in use
            self._ReplacePackageInCode(module)

        module.in_import = False
        return module

예제 #34

0

파일 보기

파일: reindent.py 프로젝트: hawkhai/hawkhai.github.io

def check(file, depth):
    if depth > 1 and os.path.isfile(os.path.join(file, ".git")): return
    if os.path.isdir(file) and not os.path.islink(file):
        if verbose:
            print("listing directory", file)
        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if ((recurse and os.path.isdir(fullname)
                 and not os.path.islink(fullname)
                 and not os.path.split(fullname)[1].startswith("."))
                    or name.lower().endswith(".py")):
                check(fullname, depth + 1)
        return

    if verbose:
        print("checking", file, "...", end=' ')
    with open(file, 'rb') as f:
        try:
            encoding, _ = tokenize.detect_encoding(f.readline)
        except SyntaxError as se:
            errprint("%s: SyntaxError: %s" % (file, str(se)))
            return
    try:
        print(file)
        with open(file, encoding=encoding) as f:
            r = Reindenter(f)
    except IOError as msg:
        errprint("%s: I/O Error: %s" % (file, str(msg)))
        return

    newline = spec_newline if spec_newline else r.newlines
    if isinstance(newline, tuple):
        errprint(
            "%s: mixed newlines detected; cannot continue without --newline" %
            file)
        return

    if r.run():
        if verbose:
            print("changed.")
            if dryrun:
                print("But this is a dry run, so leaving it alone.")
        if not dryrun:
            bak = file + ".bak"
            if makebackup:
                shutil.copyfile(file, bak)
                if verbose:
                    print("backed up", file, "to", bak)
            with open(file, "w", encoding=encoding, newline=newline) as f:
                r.write(f)
            if verbose:
                print("wrote new", file)
        return True
    else:
        if verbose:
            print("unchanged.")
        return False

예제 #35

0

파일 보기

 def test_cookie_first_line_no_bom(self):
     lines = (
         b'# -*- coding: latin-1 -*-\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso-8859-1')
     self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

예제 #36

0

파일 보기

 def test_no_bom_no_encoding_cookie(self):
     lines = (
         b'# something\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     self.assertEqual(consumed_lines, list(lines[:2]))

예제 #37

0

파일 보기

def _detect_encoding(source: bytes) -> str:
    """

	:param bytes source:
	:type source:
	:return:
	:rtype:
	"""
    return tokenize.detect_encoding(io.BytesIO(source).readline)[0]

예제 #38

0

파일 보기

def _get_complexity(src_code):
    to_count = 'print'
    encoding = detect_encoding(
        (l.encode() for l in src_code.split(os_linesep, 1)).__next__)[0]

    with BytesIO(src_code.encode(encoding, 'ignore')) as src_stream:
        return dict(
            Counter(t[1] for t in tokenize(src_stream.readline)
                    if t[0] is t_name and t[1] == to_count))

예제 #39

0

파일 보기

파일: test_tokenize.py 프로젝트: ChowZenki/kbengine

 def test_matched_bom_and_cookie_first_line(self):
     lines = (
         b'\xef\xbb\xbf# coding=utf-8\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8-sig')
     self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

예제 #40

0

파일 보기

파일: test_tokenize.py 프로젝트: ChowZenki/kbengine

 def test_no_bom_no_encoding_cookie(self):
     lines = (
         b'# something\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     self.assertEqual(consumed_lines, list(lines[:2]))

예제 #41

0

파일 보기

파일: test_tokenize.py 프로젝트: ChowZenki/kbengine

 def test_cookie_first_line_no_bom(self):
     lines = (
         b'# -*- coding: latin-1 -*-\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso-8859-1')
     self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

예제 #42

0

파일 보기

파일: core.py 프로젝트: Airr/dmgbuild

def load_settings(filename, settings):
    encoding = 'utf-8'
    with open(filename, 'rb') as fp:
        try:
            encoding = tokenize.detect_encoding(fp.readline)[0]
        except SyntaxError:
            pass

    with open(filename, 'r', encoding=encoding) as fp:
        exec(compile(fp.read(), filename, 'exec'), settings, settings)

예제 #43

0

파일 보기

파일: builder.py 프로젝트: Chaos99/cachetools

 def open_source_file(filename):
     byte_stream = open(filename, 'bU')
     encoding = detect_encoding(byte_stream.readline)[0]
     stream = open(filename, 'U', encoding=encoding)
     try:
         data = stream.read()
     except UnicodeError, uex: # wrong encodingg
         # detect_encoding returns utf-8 if no encoding specified
         msg = 'Wrong (%s) or no encoding specified' % encoding
         raise ASTNGBuildingException(msg)

예제 #44

0

파일 보기

파일: loc_mccabe_extract.py 프로젝트: jacksonpradolima/comfort

 def _read(filename):
     try:
         with open(filename, 'rb') as f:
             (encoding, _) = tokenize.detect_encoding(f.readline)
     except (LookupError, SyntaxError, UnicodeError):
         # Fall back if file encoding is improperly declared
         with open(filename, encoding='latin-1') as f:
             return f.readlines()
     with open(filename, 'r', encoding=encoding) as f:
         return f.readlines()

예제 #45

0

파일 보기

파일: phystokens.py 프로젝트: fantasy0901/blog

def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string: the text of the program.

    Returns a string, the name of the encoding.

    """
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]

예제 #46

0

파일 보기

파일: test_tokenize.py 프로젝트: LinkedModernismProject/web_code

    def test_short_files(self):
        readline = self.get_readline((b'print(something)\n',))
        encoding, consumed_lines = detect_encoding(readline)
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [b'print(something)\n'])

        encoding, consumed_lines = detect_encoding(self.get_readline(()))
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [])

        readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
        encoding, consumed_lines = detect_encoding(readline)
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [b'print(something)\n'])

        readline = self.get_readline((b'\xef\xbb\xbf',))
        encoding, consumed_lines = detect_encoding(readline)
        self.assertEquals(encoding, 'utf-8')
        self.assertEquals(consumed_lines, [])

예제 #47

0

파일 보기

파일: openpy.py 프로젝트: asmeurer/xonsh

 def tokopen(filename):
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     buf = io.open(filename, "rb")  # Tweaked to use io.open for Python 2
     encoding, lines = detect_encoding(buf.readline)
     buf.seek(0)
     text = io.TextIOWrapper(buf, encoding, line_buffering=True)
     text.mode = "r"
     return text

예제 #48

0

파일 보기

def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string: the text of the program.

    Returns a string, the name of the encoding.

    """
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]

예제 #49

0

파일 보기

파일: openpy.py 프로젝트: BlackEarth/portable-python-win32

 def open(filename):
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     buffer = io.open(filename, 'rb')   # Tweaked to use io.open for Python 2
     encoding, lines = detect_encoding(buffer.readline)
     buffer.seek(0)
     text = TextIOWrapper(buffer, encoding, line_buffering=True)
     text.mode = 'r'
     return text

예제 #50

0

파일 보기

파일: create.py 프로젝트: Arturo0911/water-quality

def load_setup():
    """run the setup script (i.e the setup.py file)

    This function load the setup file in all cases (even if it have already
    been loaded before, because we are monkey patching its setup function with
    a particular one"""
    with open("setup.py", "rb") as f:
        encoding, lines = detect_encoding(f.readline)
    with open("setup.py", encoding=encoding) as f:
        imp.load_module("setup", f, "setup.py", (".py", "r", imp.PY_SOURCE))

예제 #51

0

파일 보기

파일: builder.py 프로젝트: Luobata/.vim

 def open_source_file(filename):
     byte_stream = open(filename, 'bU')
     encoding = detect_encoding(byte_stream.readline)[0]
     stream = open(filename, 'U', encoding=encoding)
     try:
         data = stream.read()
     except UnicodeError, uex:  # wrong encodingg
         # detect_encoding returns utf-8 if no encoding specified
         msg = 'Wrong (%s) or no encoding specified' % encoding
         raise AstroidBuildingException(msg)

예제 #52

0

파일 보기

파일: test_tokenize.py 프로젝트: 5outh/Databases-Fall2014

 def test_cookie_second_line_noncommented_first_line(self):
     lines = (
         b"print('\xc2\xa3')\n",
         b'# vim: set fileencoding=iso8859-15 :\n',
         b"print('\xe2\x82\xac')\n"
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     expected = [b"print('\xc2\xa3')\n"]
     self.assertEqual(consumed_lines, expected)

예제 #53

0

파일 보기

파일: test_tokenize.py 프로젝트: GaloisInc/echronos

 def test_utf8_normalization(self):
     # See get_normal_name() in tokenizer.c.
     encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
     for encoding in encodings:
         for rep in ("-", "_"):
             enc = encoding.replace("-", rep)
             lines = (b"#!/usr/bin/python\n", b"# coding: " + enc.encode("ascii") + b"\n", b"1 + 3\n")
             rl = self.get_readline(lines)
             found, consumed_lines = detect_encoding(rl)
             self.assertEqual(found, "utf-8")

예제 #54

0

파일 보기

파일: create.py 프로젝트: MrMalina/Source.Python

def load_setup():
    """run the setup script (i.e the setup.py file)

    This function load the setup file in all cases (even if it have already
    been loaded before, because we are monkey patching its setup function with
    a particular one"""
    with open("setup.py", "rb") as f:
        encoding, lines = detect_encoding(f.readline)
    with open("setup.py", encoding=encoding) as f:
        imp.load_module("setup", f, "setup.py", (".py", "r", imp.PY_SOURCE))

예제 #55

0

파일 보기

파일: test_tokenize.py 프로젝트: 5outh/Databases-Fall2014

 def test_cookie_second_line_empty_first_line(self):
     lines = (
         b'\n',
         b'# vim: set fileencoding=iso8859-15 :\n',
         b"print('\xe2\x82\xac')\n"
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'iso8859-15')
     expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
     self.assertEqual(consumed_lines, expected)

예제 #56

0

파일 보기

파일: test_tokenize.py 프로젝트: henrywoo/Python3.1.3-Linux

 def test_bom_no_cookie(self):
     lines = (
         b'\xef\xbb\xbf# something\n',
         b'print(something)\n',
         b'do_something(else)\n'
     )
     encoding, consumed_lines = detect_encoding(self.get_readline(lines))
     self.assertEqual(encoding, 'utf-8')
     self.assertEqual(consumed_lines,
                      [b'# something\n', b'print(something)\n'])

예제 #57

0

파일 보기

def stdin_get_value() -> str:
    """Get and cache it so plugins can use it."""
    stdin_value = sys.stdin.buffer.read()
    fd = io.BytesIO(stdin_value)
    try:
        coding, _ = tokenize.detect_encoding(fd.readline)
        fd.seek(0)
        return io.TextIOWrapper(fd, coding).read()
    except (LookupError, SyntaxError, UnicodeError):
        return stdin_value.decode("utf-8")

예제 #58

0

파일 보기

파일: openpy.py 프로젝트: zhuzeyu22/ipython

 def open(filename):
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     buffer = io.open(filename, 'rb')  # Tweaked to use io.open for Python 2
     encoding, lines = detect_encoding(buffer.readline)
     buffer.seek(0)
     text = TextIOWrapper(buffer, encoding, line_buffering=True)
     text.mode = 'r'
     return text

예제 #59

0

파일 보기

파일: finder.py 프로젝트: MatiasNAmendola/Pcode

 def _LoadModule(self, name, fp, path, info, deferredImports,
         parent = None, namespace = False):
     """Load the module, given the information acquired by the finder."""
     suffix, mode, type = info
     if type == imp.PKG_DIRECTORY:
         return self._LoadPackage(name, path, parent, deferredImports,
                 namespace)
     module = self._AddModule(name)
     module.file = path
     module.parent = parent
     
     if type == imp.PY_SOURCE:
         # Load & compile Python source code
         if sys.version_info[0] >= 3:
             # For Python 3, read the file with the correct encoding
             import tokenize
             fp = open(path, "rb")
             encoding, lines = tokenize.detect_encoding(fp.readline)
             fp = open(path, "U", encoding = encoding)
         codeString = fp.read()
         if codeString and codeString[-1] != "\n":
             codeString = codeString + "\n"
         module.code = compile(codeString, path, "exec")
     
     elif type == imp.PY_COMPILED:
         # Load Python bytecode
         if isinstance(fp, str):
             magic = fp[:4]
         else:
             magic = fp.read(4)
         if magic != imp.get_magic():
             raise ImportError("Bad magic number in %s" % path)
         if isinstance(fp, str):
             module.code = marshal.loads(fp[8:])
             module.inZipFile = True
         else:
             fp.read(4)
             module.code = marshal.load(fp)
     
     # If there's a custom hook for this module, run it.
     self._RunHook("load", module.name, module)
     
     if module.code is not None:
         if self.replacePaths:
             topLevelModule = module
             while topLevelModule.parent is not None:
                 topLevelModule = topLevelModule.parent
             module.code = self._ReplacePathsInCode(topLevelModule,
                     module.code)
         
         # Scan the module code for import statements
         self._ScanCode(module.code, module, deferredImports)
     
     module.inImport = False
     return module

예제 #60

0

파일 보기

파일: objects.py 프로젝트: mattip/vmprof-python

def decode_source(source_bytes):
    # copied from _bootstrap_external.py
    """Decode bytes representing source code and return the string.
    Universal newline support is used in the decoding.
    """
    import _io
    import tokenize  # To avoid bootstrap issues.
    source_bytes_readline = _io.BytesIO(source_bytes).readline
    encoding = tokenize.detect_encoding(source_bytes_readline)
    newline_decoder = _io.IncrementalNewlineDecoder(None, True)
    return newline_decoder.decode(source_bytes.decode(encoding[0]))