Exemple #1
0
    def read_w(self, space, w_size=None):
        self._check_attached(space)
        self._check_closed(space)
        if not self.w_decoder:
            raise oefmt(space.w_IOError, "not readable")

        size = convert_size(space, w_size)
        self._writeflush(space)

        if size < 0:
            # Read everything
            w_bytes = space.call_method(self.w_buffer, "read")
            w_decoded = space.call_method(self.w_decoder, "decode", w_bytes,
                                          space.w_True)
            check_decoded(space, w_decoded)
            chars, lgt = self.decoded.get_chars(-1)
            w_result = space.newutf8(chars, lgt)
            w_final = space.add(w_result, w_decoded)
            self.decoded.reset()
            self.snapshot = None
            return w_final

        remaining = size
        builder = Utf8StringBuilder(size)

        # Keep reading chunks until we have n characters to return
        while remaining > 0:
            if not self._ensure_data(space):
                break
            data, size = self.decoded.get_chars(remaining)
            builder.append_utf8(data, size)
            remaining -= size

        return space.newutf8(builder.build(), builder.getlength())
Exemple #2
0
 def writerow(self, w_fields):
     """Construct and write a CSV record from a sequence of fields.
     Non-string elements will be converted to string."""
     space = self.space
     fields_w = space.listview(w_fields)
     dialect = self.dialect
     rec = Utf8StringBuilder(80)
     #
     for field_index in range(len(fields_w)):
         w_field = fields_w[field_index]
         if space.is_w(w_field, space.w_None):
             field = ""
             length = 0
         elif space.isinstance_w(w_field, space.w_float):
             field, length = space.utf8_len_w(space.repr(w_field))
         else:
             field, length = space.utf8_len_w(space.str(w_field))
         #
         if dialect.quoting == QUOTE_NONNUMERIC:
             try:
                 space.float_w(w_field)    # is it an int/long/float?
                 quoted = False
             except OperationError as e:
                 if e.async(space):
                     raise
                 quoted = True
         elif dialect.quoting == QUOTE_ALL:
             quoted = True
Exemple #3
0
    def _readline(self, space, limit):
        # This is a separate function so that readline_w() can be jitted.
        remnant = None
        remnant_ulen = -1
        builder = Utf8StringBuilder()
        while True:
            # First, get some data if necessary
            has_data = self._ensure_data(space)
            if not has_data:
                # end of file
                if remnant:
                    builder.append_utf8(remnant, remnant_ulen)
                break

            if remnant:
                assert not self.readtranslate and self.readnl == '\r\n'
                assert self.decoded.pos == 0
                if remnant == '\r' and self.decoded.text[0] == '\n':
                    builder.append_utf8('\r\n', 2)
                    self.decoded.pos = 1
                    self.decoded.upos = 1
                    remnant = None
                    remnant_ulen = -1
                    break
                else:
                    builder.append_utf8(remnant, remnant_ulen)
                    remnant = None
                    remnant_ulen = -1
                    continue

            if limit >= 0:
                remaining = limit - builder.getlength()
                assert remaining >= 0
            else:
                remaining = -1
            start = self.decoded.pos
            ustart = self.decoded.upos
            assert start >= 0
            found = self._scan_line_ending(remaining)
            end_scan = self.decoded.pos
            uend_scan = self.decoded.upos
            if end_scan > start:
                builder.append_utf8_slice(self.decoded.text, start, end_scan,
                                          uend_scan - ustart)

            if found or (limit >= 0 and builder.getlength() >= limit):
                break

            # There may be some remaining chars we'll have to prepend to the
            # next chunk of data
            if not self.decoded.exhausted():
                remnant, remnant_ulen = self.decoded.get_chars(-1)
            # We have consumed the buffer
            self.decoded.reset()

        result = builder.build()
        lgt = builder.getlength()
        return (result, lgt)
Exemple #4
0
    def _read(self, space, size):
        remaining = size
        builder = Utf8StringBuilder(size)

        # Keep reading chunks until we have n characters to return
        while remaining > 0:
            if not self._ensure_data(space):
                break
            data, size = self.decoded.get_chars(remaining)
            builder.append_utf8(data, size)
            remaining -= size

        return space.newutf8(builder.build(), builder.getlength())
Exemple #5
0
 def build(self, space, r, stop):
     builder = Utf8StringBuilder(stop * 3)
     for i in range(stop):
         builder.append_code(r[i])
     return space.newutf8(builder.build(), stop)
Exemple #6
0
 def __init__(self, space):
     self.space = space
     self.builder = Utf8StringBuilder()
Exemple #7
0
 def __init__(self, space, size):
     if size < 0:
         self.builder = Utf8StringBuilder()
     else:
         self.builder = Utf8StringBuilder(size)
Exemple #8
0
    def next_w(self):
        space = self.space
        dialect = self.dialect
        self.fields_w = objectmodel.newlist_hint(self.sizehint)
        self.numeric_field = False
        field_builder = None  # valid iff state not in [START_RECORD, EAT_CRNL]
        state = START_RECORD
        #
        while True:
            try:
                w_line = space.next(self.w_iter)
            except OperationError as e:
                if e.match(space, space.w_StopIteration):
                    if (field_builder is not None and state != START_RECORD
                            and state != EAT_CRNL
                            and (len(field_builder.build()) > 0
                                 or state == IN_QUOTED_FIELD)):
                        if dialect.strict:
                            raise self.error(u"unexpected end of data")
                        else:
                            self.save_field(field_builder)
                            break
                raise
            self.line_num += 1
            if space.isinstance_w(w_line, space.w_bytes):
                raise self.error(u"iterator should return strings, not bytes "
                                 u"(did you open the file in text mode?")
            line = space.utf8_w(w_line)
            for c in Utf8StringIterator(line):
                if c == 0:
                    raise self.error(u"line contains NULL byte")

                if state == START_RECORD:
                    if c == ord(u'\n') or c == ord(u'\r'):
                        state = EAT_CRNL
                        continue
                    # normal character - handle as START_FIELD
                    state = START_FIELD
                    # fall-through to the next case

                if state == START_FIELD:
                    field_builder = Utf8StringBuilder(64)
                    # expecting field
                    if c == ord(u'\n') or c == ord(u'\r'):
                        # save empty field
                        self.save_field(field_builder)
                        state = EAT_CRNL
                    elif (c == dialect.quotechar
                          and dialect.quoting != QUOTE_NONE):
                        # start quoted field
                        state = IN_QUOTED_FIELD
                    elif c == dialect.escapechar:
                        # possible escaped character
                        state = ESCAPED_CHAR
                    elif c == ord(u' ') and dialect.skipinitialspace:
                        # ignore space at start of field
                        pass
                    elif c == dialect.delimiter:
                        # save empty field
                        self.save_field(field_builder)
                    else:
                        # begin new unquoted field
                        if dialect.quoting == QUOTE_NONNUMERIC:
                            self.numeric_field = True
                        self.add_char(field_builder, c)
                        state = IN_FIELD

                elif state == ESCAPED_CHAR:
                    if c == ord(u'\n') or c == ord(u'\r'):
                        self.add_char(field_builder, c)
                        state = AFTER_ESCAPED_CRNL
                    else:
                        self.add_char(field_builder, c)
                        state = IN_FIELD

                elif state == IN_FIELD or state == AFTER_ESCAPED_CRNL:
                    # in unquoted field
                    if c == ord(u'\n') or c == ord(u'\r'):
                        # end of line
                        self.save_field(field_builder)
                        state = EAT_CRNL
                    elif c == dialect.escapechar:
                        # possible escaped character
                        state = ESCAPED_CHAR
                    elif c == dialect.delimiter:
                        # save field - wait for new field
                        self.save_field(field_builder)
                        state = START_FIELD
                    else:
                        # normal character - save in field
                        self.add_char(field_builder, c)

                elif state == IN_QUOTED_FIELD:
                    # in quoted field
                    if c == dialect.escapechar:
                        # Possible escape character
                        state = ESCAPE_IN_QUOTED_FIELD
                    elif (c == dialect.quotechar
                          and dialect.quoting != QUOTE_NONE):
                        if dialect.doublequote:
                            # doublequote; " represented by ""
                            state = QUOTE_IN_QUOTED_FIELD
                        else:
                            # end of quote part of field
                            state = IN_FIELD
                    else:
                        # normal character - save in field
                        self.add_char(field_builder, c)

                elif state == ESCAPE_IN_QUOTED_FIELD:
                    self.add_char(field_builder, c)
                    state = IN_QUOTED_FIELD

                elif state == QUOTE_IN_QUOTED_FIELD:
                    # doublequote - seen a quote in an quoted field
                    if (dialect.quoting != QUOTE_NONE
                            and c == dialect.quotechar):
                        # save "" as "
                        self.add_char(field_builder, c)
                        state = IN_QUOTED_FIELD
                    elif c == dialect.delimiter:
                        # save field - wait for new field
                        self.save_field(field_builder)
                        state = START_FIELD
                    elif c == ord(u'\n') or c == ord(u'\r'):
                        # end of line
                        self.save_field(field_builder)
                        state = EAT_CRNL
                    elif not dialect.strict:
                        self.add_char(field_builder, c)
                        state = IN_FIELD
                    else:
                        # illegal
                        raise self.error(u"'%s' expected after '%s'" % (unichr(
                            dialect.delimiter), unichr(dialect.quotechar)))

                elif state == EAT_CRNL:
                    if not (c == ord(u'\n') or c == ord(u'\r')):
                        raise self.error(
                            u"new-line character seen in unquoted "
                            u"field - do you need to open the file "
                            u"in universal-newline mode?")

            if state == IN_FIELD or state == QUOTE_IN_QUOTED_FIELD:
                self.save_field(field_builder)
                break
            elif state == ESCAPED_CHAR:
                self.add_char(field_builder, ord(u'\n'))
                state = IN_FIELD
            elif state == IN_QUOTED_FIELD:
                pass
            elif state == ESCAPE_IN_QUOTED_FIELD:
                self.add_char(field_builder, ord(u'\n'))
                state = IN_QUOTED_FIELD
            elif state == START_FIELD:
                # save empty field
                field_builder = Utf8StringBuilder()
                self.save_field(field_builder)
                break
            elif state == AFTER_ESCAPED_CRNL:
                continue
            else:
                break
        #
        w_result = space.newlist(self.fields_w)
        # assume all lines have the same number of fields
        self.sizehint = len(self.fields_w)
        self.fields_w = None
        return w_result