def _read_lines(self, file_path): """Read the file at a path, and return its lines. Raises: IOError: If the file does not exist or cannot be read. """ # Support the UNIX convention of using "-" for stdin. if file_path == '-': file = codecs.StreamReaderWriter(sys.stdin, codecs.getreader('utf8'), codecs.getwriter('utf8'), 'replace') else: # We do not open the file with universal newline support # (codecs does not support it anyway), so the resulting # lines contain trailing "\r" characters if we are reading # a file with CRLF endings. # FIXME: This should use self.filesystem file = codecs.open(file_path, 'r', 'utf8', 'replace') try: contents = file.read() finally: file.close() lines = contents.split('\n') return lines
def ProcessErrorList(_filename): error_dic_list = {} lines = [] # Support the UNIX convention of using "-" for stdin. Note that # we are not opening the file with universal newline support # (which codecs doesn't support anyway), so the resulting lines do # contain trailing '\r' characters if we are reading a file that # has CRLF endings. # If after the split a trailing '\r' is present, it is removed # below. if _filename == '-': lines = codecs.StreamReaderWriter(sys.stdin, codecs.getreader('utf8'), codecs.getwriter('utf8'), 'replace').readlines() else: lines = codecs.open(_filename, 'rb', 'utf8', 'replace').readlines() for line in lines: info = __ErrorInfo(line) if not info.filename: continue if info.filename not in error_dic_list: error_dic_list[info.filename] = [] error_dic_list[info.filename].append(info) return error_dic_list
def read_with_encoding(self, filename, document, codec_info, encoding): global cache f = None try: if not self.arguments[0] in cache: f = codecs.StreamReaderWriter( urllib.request.urlopen(self.arguments[0]), codec_info[2], codec_info[3], 'strict') lines = f.readlines() cache[self.arguments[0]] = lines else: lines = cache[self.arguments[0]] lines = dedent_lines(lines, self.options.get('dedent')) return lines except (IOError, OSError, urllib.error.URLError): return [ document.reporter.warning( 'Include file %r not found or reading it failed' % self.arguments[0], line=self.lineno) ] except UnicodeError: return [ document.reporter.warning( 'Encoding %r used for reading included file %r seems to ' 'be wrong, try giving an :encoding: option' % (encoding, self.arguments[0])) ] finally: if f is not None: f.close()
def open(self): mode = self.mode if 'b' not in mode: mode = mode + 'b' # We want to make sure that both unicode and str objects can be output # to the logger without the client code having to care about the # situation. # # The problem with the object returned from codecs.open(), is that it # assumes that whatever is given to write() is *already* in the # encoding specified as the parameter. This works most of the time, # but breaks down when we pass a string (byte seq) with øæå in latin-1 # to a stream that assumes the input is in UTF-8. # # This is a slight variation of what codecs.open() does (and python's # logging module uses codecs.open() to enable various encodings for # the logs on file) stream = file(self.baseFilename, mode) encoder, decoder, reader, writer = codecs.lookup(self.encoding) srw = codecs.StreamReaderWriter(stream, reader, CerelogStreamWriter) srw.encoding = self.encoding srw.writer.encoding = srw.encoding srw.writer.encode = encoder self.stream = srw return self.stream
def VerifyUcs2Data(FileIn, FileName, Encoding): Ucs2Info = codecs.lookup('ucs-2') # # Convert to unicode # try: FileDecoded = codecs.decode(FileIn, Encoding) Ucs2Info.encode(FileDecoded) except: UniFile = BytesIO(FileIn) Info = codecs.lookup(Encoding) (Reader, Writer) = (Info.streamreader, Info.streamwriter) File = codecs.StreamReaderWriter(UniFile, Reader, Writer) LineNumber = 0 ErrMsg = lambda Encoding, LineNumber: \ '%s contains invalid %s characters on line %d.' % \ (FileName, Encoding, LineNumber) while True: LineNumber = LineNumber + 1 try: Line = File.readline() if Line == '': EdkLogger.error('Unicode File Parser', PARSER_ERROR, ErrMsg(Encoding, LineNumber)) Ucs2Info.encode(Line) except: EdkLogger.error('Unicode File Parser', PARSER_ERROR, ErrMsg('UCS-2', LineNumber))
def OpenUniFile(FileName): # # Read file # try: UniFile = open(FileName, mode='rb') FileIn = UniFile.read() UniFile.close() except: EdkLogger.Error("build", FILE_OPEN_FAILURE, ExtraData=File) # # Detect Byte Order Mark at beginning of file. Default to UTF-8 # Encoding = 'utf-8' if (FileIn.startswith(codecs.BOM_UTF16_BE) or FileIn.startswith(codecs.BOM_UTF16_LE)): Encoding = 'utf-16' UniFileClassObject.VerifyUcs2Data(FileIn, FileName, Encoding) UniFile = BytesIO(FileIn) Info = codecs.lookup(Encoding) (Reader, Writer) = (Info.streamreader, Info.streamwriter) return codecs.StreamReaderWriter(UniFile, Reader, Writer)
def main(): """Main entry function.""" if len(sys.argv) < 3: print('Usage: <project-name> <filetype> <list-of-path to traverse>') print('\tfiletype can be python/cpp/all') exit(-1) _HELPER.project_name = sys.argv[1] file_type = sys.argv[2] allow_type = [] if file_type == 'python' or file_type == 'all': allow_type += [x for x in PYTHON_SUFFIX] if file_type == 'cpp' or file_type == 'all': allow_type += [x for x in CXX_SUFFIX] allow_type = set(allow_type) if sys.version_info.major == 2 and os.name != 'nt': sys.stderr = codecs.StreamReaderWriter(sys.stderr, codecs.getreader('utf8'), codecs.getwriter('utf8'), 'replace') for path in sys.argv[3:]: if os.path.isfile(path): process(path, allow_type) else: for root, dirs, files in os.walk(path): for name in files: process(os.path.join(root, name), allow_type) nerr = _HELPER.print_summary(sys.stderr) sys.exit(nerr > 0)
def __init__(self, buffer): # TextIOWrapper closes its underlying buffer on close *and* can't # handle actual file objects (on python 2) self.buffer = codecs.StreamReaderWriter( stream=buffer, Reader=codecs.getreader('utf-8'), Writer=codecs.getwriter('utf-8'))
def read_with_encoding(self, filename, document, codec_info, encoding): f = None try: f = codecs.StreamReaderWriter(open(filename, 'rb'), codec_info[2], codec_info[3], 'strict') lines = f.readlines() lines = dedent_lines(lines, self.options.get('dedent')) return lines except (IOError, OSError): return [ document.reporter.warning( 'Include file %r not found or reading it failed' % filename, line=self.lineno) ] except UnicodeError: return [ document.reporter.warning( 'Encoding %r used for reading included file %r seems to ' 'be wrong, try giving an :encoding: option' % (encoding, filename)) ] finally: if f is not None: f.close()
def test_code_changed(self): codec = codecs.lookup('utf8') with tempfile.NamedTemporaryFile('wb') as tmp_file1, tempfile.NamedTemporaryFile('wb') as tmp_file2, \ codecs.StreamReaderWriter(tmp_file1, codec.streamreader, codec.streamwriter, 'strict') as file1, \ codecs.StreamReaderWriter(tmp_file2, codec.streamreader, codec.streamwriter, 'strict') as file2: reloader = _PollingReloader('example_service.standalone', ['pysoa']) file1.write('test 1') file1.flush() file2.write('test 2') file2.flush() # noinspection PyUnresolvedReferences with mock.patch.object(target=reloader, attribute='get_watch_file_names' ) as mock_get_watch_file_names: mock_get_watch_file_names.return_value = [ file1.name, file2.name ] self.assertFalse(reloader.code_changed()) time.sleep(1.1) file1.write('test changed 1') file1.flush() self.assertTrue(reloader.code_changed()) self.assertFalse(reloader.code_changed()) time.sleep(1.1) file2.write('test changed 2') file2.flush() self.assertTrue(reloader.code_changed()) self.assertFalse(reloader.code_changed()) time.sleep(1.1) file2.write('test changed 2 again') file2.flush() self.assertTrue(reloader.code_changed())
def __init__( self, host=PUDB_RDB_HOST, port=PUDB_RDB_PORT, port_search_limit=100, out=sys.stdout, term_size=None, reverse=False, ): """ :arg term_size: A two-tuple ``(columns, rows)``, or *None*. If *None*, try to determine the terminal size automatically. Currently, this uses a heuristic: It uses the terminal size of the debuggee as that for the debugger. The idea is that you might be running both in two tabs of the same terminal window, hence using terminals of the same size. """ self.out = out if term_size is None: try: s = struct.unpack("hh", fcntl.ioctl(1, termios.TIOCGWINSZ, "1234")) term_size = (s[1], s[0]) except Exception: term_size = (80, 24) self._prev_handles = sys.stdin, sys.stdout self._client, (address, port) = self.get_client( host=host, port=port, search_limit=port_search_limit, reverse=reverse ) self.remote_addr = ":".join(str(v) for v in address) self.say(SESSION_STARTED.format(self=self)) # makefile ignores encoding if there's no buffering. raw_sock_file = self._client.makefile("rwb", 0) import codecs sock_file = codecs.StreamReaderWriter( raw_sock_file, codecs.getreader("utf-8"), codecs.getwriter("utf-8")) self._handle = sys.stdin = sys.stdout = sock_file # nc negotiation doesn't support telnet options if not reverse: import telnetlib as tn raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.SGA raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.ECHO Debugger.__init__( self, stdin=self._handle, stdout=self._handle, term_size=term_size )
def _temp_fixture_file_name_context(contents): temp_file = tempfile.NamedTemporaryFile(mode='wb') codec = codecs.lookup('utf-8') with codecs.StreamReaderWriter(temp_file, codec.streamreader, codec.streamwriter, 'strict') as writer: writer.write(contents) writer.flush() yield temp_file.name
def _wrap_stream_for_codec(f, encoding=None, errors='strict'): if encoding is None: encoding = 'utf-8' info = codecs.lookup(encoding) f = codecs.StreamReaderWriter(f, info.streamreader, info.streamwriter, errors) f.encoding = encoding return f
def write_csv(headers, data): csv_data = cStringIO.StringIO() codecinfo = codecs.lookup("utf8") wrapper = codecs.StreamReaderWriter(csv_data, codecinfo.streamreader, codecinfo.streamwriter) writer = csv.writer(wrapper) writer.writerow(headers) map(lambda row: writer.writerow(row), data) return csv_data.getvalue()
def matchFileContents(self, path): with self.view.open(path, "rb") as rb: w = codecs.getwriter("utf-8") r = codecs.getreader("utf-8") reader = codecs.StreamReaderWriter(rb, r, w) for line in reader.readlines(): if self.pattern in line: return True return False
def __init__( self, host=PUDB_RDB_HOST, port=PUDB_RDB_PORT, port_search_limit=100, out=sys.stdout, term_size=None, reverse=False, ): self.active = True self.out = out self._prev_handles = sys.stdin, sys.stdout self._client, (address, port) = self.get_client(host=host, port=port, search_limit=port_search_limit, reverse=reverse) self.remote_addr = ":".join(str(v) for v in address) self.say(SESSION_STARTED.format(self=self)) # makefile ignores encoding if there's no buffering. raw_sock_file = self._client.makefile("rwb", 0) import codecs if sys.version_info[0] < 3: sock_file = codecs.StreamRecoder( raw_sock_file, codecs.getencoder("utf-8"), codecs.getdecoder("utf-8"), codecs.getreader("utf-8"), codecs.getwriter("utf-8"), ) else: sock_file = codecs.StreamReaderWriter(raw_sock_file, codecs.getreader("utf-8"), codecs.getwriter("utf-8")) self._handle = sys.stdin = sys.stdout = sock_file # nc negotiation doesn't support telnet options if not reverse: import telnetlib as tn raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.SGA raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.ECHO Debugger.__init__(self, stdin=self._handle, stdout=self._handle, term_size=term_size)
def __init__(self, host=PUDB_RDB_HOST, port=PUDB_RDB_PORT, port_search_limit=100, out=sys.stdout, term_size=None): self.active = True self.out = out self._prev_handles = sys.stdin, sys.stdout self._sock, this_port = self.get_avail_port(host, port, port_search_limit) self._sock.setblocking(1) self._sock.listen(1) self.ident = '{0}:{1}'.format(self.me, this_port) self.host = host self.port = this_port self.say(BANNER.format(self=self)) self._client, address = self._sock.accept() self._client.setblocking(1) self.remote_addr = ':'.join(str(v) for v in address) self.say(SESSION_STARTED.format(self=self)) # makefile ignores encoding if there's no buffering. raw_sock_file = self._client.makefile("rwb", 0) import codecs if sys.version_info[0] < 3: sock_file = codecs.StreamRecoder(raw_sock_file, codecs.getencoder("utf-8"), codecs.getdecoder("utf-8"), codecs.getreader("utf-8"), codecs.getwriter("utf-8")) else: sock_file = codecs.StreamReaderWriter(raw_sock_file, codecs.getreader("utf-8"), codecs.getwriter("utf-8")) self._handle = sys.stdin = sys.stdout = sock_file import telnetlib as tn raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.SGA raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.ECHO Debugger.__init__(self, stdin=self._handle, stdout=self._handle, term_size=term_size)
def create_node(self, filename, rel_filename, lang): document = self.state.document env = document.settings.env # Read the contents of the file to include encoding = self.options.get('encoding', env.config.source_encoding) codec_info = codecs.lookup(encoding) try: f = codecs.StreamReaderWriter(open(filename, 'rb'), codec_info[2], codec_info[3], 'strict') lines = f.readlines() f.close() except (IOError, OSError): print_err('Failed to read %r' % filename) return [document.reporter.warning( 'Include file %r not found or reading it failed' % filename, line=self.lineno)] except UnicodeError: print_err('Encoding %r used for reading included file %r seems to ' 'be wrong, try giving an :encoding: option' % (encoding, filename)) return [document.reporter.warning( 'Encoding %r used for reading included file %r seems to ' 'be wrong, try giving an :encoding: option' % (encoding, filename))] objectname = self.options.get('pyobject') if objectname is not None: from sphinx.pycode import ModuleAnalyzer analyzer = ModuleAnalyzer.for_file(filename, '') tags = analyzer.find_tags() if objectname not in tags: return [document.reporter.warning( 'Object named %r not found in include file %r' % (objectname, filename), line=self.lineno)] else: lines = lines[tags[objectname][1]-1 : tags[objectname][2]-1] linespec = self.options.get('lines') if linespec is not None: try: linelist = parselinenos(linespec, len(lines)) except ValueError, err: return [document.reporter.warning(str(err), line=self.lineno)] # just ignore nonexisting lines nlines = len(lines) lines = [lines[i] for i in linelist if i < nlines] if not lines: return [document.reporter.warning( 'Line spec %r: no lines pulled from include file %r' % (linespec, filename), line=self.lineno)]
def _parseBuckOut(file_path): if file_path.endswith('.gz'): with gzip.open(file_path, 'rb') as raw_log: info = codecs.lookup('utf-8') utf8_log = codecs.StreamReaderWriter(raw_log, info.streamreader, info.streamwriter) return RuleKeyStructureInfo._parseLogFile(utf8_log) with io.open(file_path, mode='r', encoding='utf-8') as buck_out: return RuleKeyStructureInfo._parseLogFile(buck_out)
def _parseBuckOut(file_path): if file_path.endswith(".gz"): with gzip.open(file_path, "rb") as raw_log: info = codecs.lookup("utf-8") utf8_log = codecs.StreamReaderWriter(raw_log, info.streamreader, info.streamwriter) return RuleKeyStructureInfo._parseLogFile(utf8_log) with io.open(file_path, mode="r", encoding="utf-8") as buck_out: return RuleKeyStructureInfo._parseLogFile(buck_out)
def open_file(path, mode='r', encoding='utf-8', **kwargs): if mode in ('r', 'w'): info = codecs.lookup(encoding) open_binary = FileSystems.open if mode == 'r' else FileSystems.create with open_binary(path, **kwargs) as fp: # Python 3 CSV package expects a text file yield codecs.StreamReaderWriter(fp, info.streamreader, info.streamwriter) else: raise ValueError('invalid mode: %s' % mode)
def uopen(fileName, encoding = 'ascii', mode = 'r'): encoder, decoder, streamReader, streamWriter = codecs.lookup(encoding) fd = zopen(fileName, mode) if mode == 'w' or mode == 'a': return streamWriter(fd) elif mode == 'r': return streamReader(fd) else: return codecs.StreamReaderWriter(fd, streamReader, streamWriter)
def _unicode_open(file, encoding, errors='strict'): info = codecs.lookup(encoding) if isinstance(info, tuple): reader = info[2] writer = info[3] else: reader = info.streamreader writer = info.streamwriter srw = codecs.StreamReaderWriter(file, reader, writer, errors) srw.encoding = encoding return srw
def __init__(self, enabled=True): self.enabled = enabled self.orig_stdout = sys.stdout self.cap_stdout = cStringIO() if six.PY2: # http://stackoverflow.com/questions/1817695/stringio-accept-utf8 codecinfo = codecs.lookup('utf8') self.cap_stdout = codecs.StreamReaderWriter( self.cap_stdout, codecinfo.streamreader, codecinfo.streamwriter) self.text = None
def __init__(self, name, filters, tpl, ctx, version_filename=False): """Inits the PatchBuffer class""" self._buffer = cStringIO.StringIO() self._codecinfo = codecs.lookup("utf-8") self._wrapper = codecs.StreamReaderWriter(self._buffer, self._codecinfo.streamreader, self._codecinfo.streamwriter) self.name = name self.filters = filters self.tpl = tpl self.ctx = ctx self.version_filename = version_filename self.modified = False
def apply_htmlparser(html, maxcol=MAXCOL, codec='utf8'): """This function extracts from the HTML string by passing it through a htmllib.HTMLParser instance (slightly modified for Unicode support). Adapted from http://www.bazza.com/~eaganj/weblog/2006/04/04/printing-html-as-text-in-python-with-unicode/ @type html: unicode @param html: The HTML to extract text from (eg. u"<html><body><h1>Hello</h1>...") @type maxcol: int @param maxcol: The maxcol value to passed to formatter.DumbWriter() @type codec: str (passed to codecs.lookup()) @param codec: The codec to use to parse the HTML. @rtype : str @return: The text parsed from the HTML.""" class UnicodeHTMLParser(htmllib.HTMLParser): """HTMLParser that can handle unicode charrefs""" entitydefs = dict([ (k, unichr(v)) for k, v in htmlentitydefs.name2codepoint.items() ]) def handle_charref(self, name): """Override builtin version to return unicode instead of binary strings for 8-bit chars.""" try: n = int(name) except ValueError: self.unknown_charref(name) return if not 0 <= n <= 255: self.unknown_charref(name) return if 0 <= n <= 127: self.handle_data(chr(n)) else: self.handle_data(unichr(n)) sio = StringIO() encoder, decoder, reader, writer = codecs.lookup(codec) codecio = codecs.StreamReaderWriter(sio, reader, writer, 'replace') writer = formatter.DumbWriter(codecio, maxcol) prettifier = formatter.AbstractFormatter(writer) parser = UnicodeHTMLParser(prettifier) parser.feed(html) parser.close() codecio.seek(0) result = codecio.read() sio.close() codecio.close() return result
def uopen(fileName, encoding='utf-8', mode='r'): encoder, decoder, streamReader, streamWriter = codecs.lookup(encoding) fd = zopen(fileName, mode) if fd in [sys.stdout, sys.stdin, sys.stderr]: return fd if mode == 'w' or mode == 'a': return streamWriter(fd) elif mode == 'r': return streamReader(fd) else: return codecs.StreamReaderWriter(fd, streamReader, streamWriter)
def ConvertHtmlToText(strHTML): ''' strHTML should passed as utf8 text''' class Formatter(formatter.AbstractFormatter): def add_line_break(self): if not (self.hard_break or self.para_end): self.have_label = self.parskip = 0 self.writer.send_line_break() self.hard_break = self.nospace = 1 self.softspace = 0 class HTML2TextParser(htmllib.HTMLParser): # entities should be encoded as utf8 entitydefs = dict([(k, unichr(v).encode('utf_8')) for k, v in htmlentitydefs.name2codepoint.items()]) def anchor_end(self): if self.anchor: self.anchor = None def convert_charref(self, name): try: n = unichr(int(name)) except ValueError: return return self.convert_codepoint(int(name)) def convert_codepoint(self, codepoint): # codepoint should also be encoded as utf8 return unichr(codepoint).encode('utf_8') def handle_image(self, src, alt, *args): # ignore images pass # cStringIO for output string stream sio = cStringIO.StringIO() encoder, decoder, reader, writer = codecs.lookup('utf_8') utf8io = codecs.StreamReaderWriter(sio, reader, writer, 'replace') writer = formatter.DumbWriter(sio) prettifier = Formatter(writer) parser = HTML2TextParser(prettifier) # Parse HTML to plain text parser.feed(strHTML) parser.close() utf8io.seek(0) result = utf8io.read() sio.close() utf8io.close() return result.replace('\n', '\r\n')
def _open(self): """Opens the log file without handle inheritance but with file sharing. Ignores self.mode. """ f = shared_open(self.baseFilename) if self.encoding: # Do the equivalent of # codecs.open(self.baseFilename, self.mode, self.encoding) info = codecs.lookup(self.encoding) f = codecs.StreamReaderWriter(f, info.streamreader, info.streamwriter, 'replace') f.encoding = self.encoding return f
def use_codec(open_file, encoding=None, errors='strict'): """ This is the same as "codecs.open()" but it uses an already open file instead of a file path to open. """ if encoding is None: return open_file info = codecs.lookup(encoding) srw = codecs.StreamReaderWriter(open_file, info.streamreader, info.streamwriter, errors) # Add attributes to simplify introspection srw.encoding = encoding return srw