def read_w(self, space, w_size=None): self._check_attached(space) self._check_closed(space) if not self.w_decoder: raise oefmt(space.w_IOError, "not readable") size = convert_size(space, w_size) self._writeflush(space) if size < 0: # Read everything w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) chars, lgt = self.decoded.get_chars(-1) w_result = space.newutf8(chars, lgt) w_final = space.add(w_result, w_decoded) self.decoded.reset() self.snapshot = None return w_final remaining = size builder = Utf8StringBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: if not self._ensure_data(space): break data, size = self.decoded.get_chars(remaining) builder.append_utf8(data, size) remaining -= size return space.newutf8(builder.build(), builder.getlength())
def writerow(self, w_fields): """Construct and write a CSV record from a sequence of fields. Non-string elements will be converted to string.""" space = self.space fields_w = space.listview(w_fields) dialect = self.dialect rec = Utf8StringBuilder(80) # for field_index in range(len(fields_w)): w_field = fields_w[field_index] if space.is_w(w_field, space.w_None): field = "" length = 0 elif space.isinstance_w(w_field, space.w_float): field, length = space.utf8_len_w(space.repr(w_field)) else: field, length = space.utf8_len_w(space.str(w_field)) # if dialect.quoting == QUOTE_NONNUMERIC: try: space.float_w(w_field) # is it an int/long/float? quoted = False except OperationError as e: if e.async(space): raise quoted = True elif dialect.quoting == QUOTE_ALL: quoted = True
def _readline(self, space, limit): # This is a separate function so that readline_w() can be jitted. remnant = None remnant_ulen = -1 builder = Utf8StringBuilder() while True: # First, get some data if necessary has_data = self._ensure_data(space) if not has_data: # end of file if remnant: builder.append_utf8(remnant, remnant_ulen) break if remnant: assert not self.readtranslate and self.readnl == '\r\n' assert self.decoded.pos == 0 if remnant == '\r' and self.decoded.text[0] == '\n': builder.append_utf8('\r\n', 2) self.decoded.pos = 1 self.decoded.upos = 1 remnant = None remnant_ulen = -1 break else: builder.append_utf8(remnant, remnant_ulen) remnant = None remnant_ulen = -1 continue if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: remaining = -1 start = self.decoded.pos ustart = self.decoded.upos assert start >= 0 found = self._scan_line_ending(remaining) end_scan = self.decoded.pos uend_scan = self.decoded.upos if end_scan > start: builder.append_utf8_slice(self.decoded.text, start, end_scan, uend_scan - ustart) if found or (limit >= 0 and builder.getlength() >= limit): break # There may be some remaining chars we'll have to prepend to the # next chunk of data if not self.decoded.exhausted(): remnant, remnant_ulen = self.decoded.get_chars(-1) # We have consumed the buffer self.decoded.reset() result = builder.build() lgt = builder.getlength() return (result, lgt)
def _read(self, space, size): remaining = size builder = Utf8StringBuilder(size) # Keep reading chunks until we have n characters to return while remaining > 0: if not self._ensure_data(space): break data, size = self.decoded.get_chars(remaining) builder.append_utf8(data, size) remaining -= size return space.newutf8(builder.build(), builder.getlength())
def build(self, space, r, stop): builder = Utf8StringBuilder(stop * 3) for i in range(stop): builder.append_code(r[i]) return space.newutf8(builder.build(), stop)
def __init__(self, space): self.space = space self.builder = Utf8StringBuilder()
def __init__(self, space, size): if size < 0: self.builder = Utf8StringBuilder() else: self.builder = Utf8StringBuilder(size)
def next_w(self): space = self.space dialect = self.dialect self.fields_w = objectmodel.newlist_hint(self.sizehint) self.numeric_field = False field_builder = None # valid iff state not in [START_RECORD, EAT_CRNL] state = START_RECORD # while True: try: w_line = space.next(self.w_iter) except OperationError as e: if e.match(space, space.w_StopIteration): if (field_builder is not None and state != START_RECORD and state != EAT_CRNL and (len(field_builder.build()) > 0 or state == IN_QUOTED_FIELD)): if dialect.strict: raise self.error(u"unexpected end of data") else: self.save_field(field_builder) break raise self.line_num += 1 if space.isinstance_w(w_line, space.w_bytes): raise self.error(u"iterator should return strings, not bytes " u"(did you open the file in text mode?") line = space.utf8_w(w_line) for c in Utf8StringIterator(line): if c == 0: raise self.error(u"line contains NULL byte") if state == START_RECORD: if c == ord(u'\n') or c == ord(u'\r'): state = EAT_CRNL continue # normal character - handle as START_FIELD state = START_FIELD # fall-through to the next case if state == START_FIELD: field_builder = Utf8StringBuilder(64) # expecting field if c == ord(u'\n') or c == ord(u'\r'): # save empty field self.save_field(field_builder) state = EAT_CRNL elif (c == dialect.quotechar and dialect.quoting != QUOTE_NONE): # start quoted field state = IN_QUOTED_FIELD elif c == dialect.escapechar: # possible escaped character state = ESCAPED_CHAR elif c == ord(u' ') and dialect.skipinitialspace: # ignore space at start of field pass elif c == dialect.delimiter: # save empty field self.save_field(field_builder) else: # begin new unquoted field if dialect.quoting == QUOTE_NONNUMERIC: self.numeric_field = True self.add_char(field_builder, c) state = IN_FIELD elif state == ESCAPED_CHAR: if c == ord(u'\n') or c == ord(u'\r'): self.add_char(field_builder, c) state = AFTER_ESCAPED_CRNL else: self.add_char(field_builder, c) state = IN_FIELD elif state == IN_FIELD or state == AFTER_ESCAPED_CRNL: # in unquoted field if c == ord(u'\n') or c == ord(u'\r'): # end of line self.save_field(field_builder) state = EAT_CRNL elif c == dialect.escapechar: # possible escaped character state = ESCAPED_CHAR elif c == dialect.delimiter: # save field - wait for new field self.save_field(field_builder) state = START_FIELD else: # normal character - save in field self.add_char(field_builder, c) elif state == IN_QUOTED_FIELD: # in quoted field if c == dialect.escapechar: # Possible escape character state = ESCAPE_IN_QUOTED_FIELD elif (c == dialect.quotechar and dialect.quoting != QUOTE_NONE): if dialect.doublequote: # doublequote; " represented by "" state = QUOTE_IN_QUOTED_FIELD else: # end of quote part of field state = IN_FIELD else: # normal character - save in field self.add_char(field_builder, c) elif state == ESCAPE_IN_QUOTED_FIELD: self.add_char(field_builder, c) state = IN_QUOTED_FIELD elif state == QUOTE_IN_QUOTED_FIELD: # doublequote - seen a quote in an quoted field if (dialect.quoting != QUOTE_NONE and c == dialect.quotechar): # save "" as " self.add_char(field_builder, c) state = IN_QUOTED_FIELD elif c == dialect.delimiter: # save field - wait for new field self.save_field(field_builder) state = START_FIELD elif c == ord(u'\n') or c == ord(u'\r'): # end of line self.save_field(field_builder) state = EAT_CRNL elif not dialect.strict: self.add_char(field_builder, c) state = IN_FIELD else: # illegal raise self.error(u"'%s' expected after '%s'" % (unichr( dialect.delimiter), unichr(dialect.quotechar))) elif state == EAT_CRNL: if not (c == ord(u'\n') or c == ord(u'\r')): raise self.error( u"new-line character seen in unquoted " u"field - do you need to open the file " u"in universal-newline mode?") if state == IN_FIELD or state == QUOTE_IN_QUOTED_FIELD: self.save_field(field_builder) break elif state == ESCAPED_CHAR: self.add_char(field_builder, ord(u'\n')) state = IN_FIELD elif state == IN_QUOTED_FIELD: pass elif state == ESCAPE_IN_QUOTED_FIELD: self.add_char(field_builder, ord(u'\n')) state = IN_QUOTED_FIELD elif state == START_FIELD: # save empty field field_builder = Utf8StringBuilder() self.save_field(field_builder) break elif state == AFTER_ESCAPED_CRNL: continue else: break # w_result = space.newlist(self.fields_w) # assume all lines have the same number of fields self.sizehint = len(self.fields_w) self.fields_w = None return w_result