def validate_text(dialect, attr): val = getattr(dialect, attr) if not isinstance(val, text_type): if type(val) == bytes: raise Error('"{0}" must be string, not bytes'.format(attr)) raise Error('"{0}" must be string, not {1}'.format( attr, type(val).__name__)) if len(val) != 1: raise Error('"{0}" must be a 1-character string'.format(attr))
def sniff(self, sample, delimiters=None): """ Returns a dialect (or None) corresponding to the sample """ quotechar, doublequote, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) if not delimiter: delimiter, skipinitialspace = self._guess_delimiter( sample, delimiters) if not delimiter: raise Error("Could not determine delimiter") class dialect(Dialect): _name = "sniffed" lineterminator = '\r\n' quoting = QUOTE_MINIMAL # escapechar = '' dialect.doublequote = doublequote dialect.delimiter = delimiter # _csv.reader won't accept a quotechar of '' dialect.quotechar = quotechar or '"' dialect.skipinitialspace = skipinitialspace return dialect
def writerow(self, row): try: iter(row) except TypeError as err: msg = "iterable expected, not %s" % type(row).__name__ raise Error(msg) from err return self.writer.writerow([_escape(field) for field in row])
def _parse_eat_crnl(self, c): if c == '\n' or c == '\r': pass elif c == '\0': self.state = START_RECORD else: raise Error('new-line character seen in unquoted field - do you ' 'need to open the file in universal-newline mode?')
def writerow(self, row): if row is None: raise Error('row must be an iterable') row = list(row) only = len(row) == 1 row = [self.strategy.prepare(field, only=only) for field in row] line = self.dialect.delimiter.join(row) + self.dialect.lineterminator self.fileobj.write(line)
def validate(cls, dialect): dialect = cls.extend(dialect) if not isinstance(dialect.quoting, int): raise Error('"quoting" must be an integer') if dialect.delimiter is None: raise Error('delimiter must be set') cls.validate_text(dialect, 'delimiter') if dialect.lineterminator is None: raise Error('lineterminator must be set') if not isinstance(dialect.lineterminator, text_type): raise Error('"lineterminator" must be a string') if dialect.quoting not in [ QUOTE_NONE, QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_ALL ]: raise Error('Invalid quoting specified') if dialect.quoting != QUOTE_NONE: if dialect.quotechar is None and dialect.escapechar is None: raise Error('quotechar must be set if quoting enabled') if dialect.quotechar is not None: cls.validate_text(dialect, 'quotechar')
def __next__(self): self.parse_reset() while True: try: lineobj = next(self.input_iter) except StopIteration: if len(self.field) != 0 or self.state == IN_QUOTED_FIELD: if self.dialect.strict: raise Error('unexpected end of data') self.parse_save_field() if self.fields: break raise if not isinstance(lineobj, text_type): typ = type(lineobj) typ_name = 'bytes' if typ == bytes else typ.__name__ err_str = ('iterator should return strings, not {0}' ' (did you open the file in text mode?)') raise Error(err_str.format(typ_name)) self.line_num += 1 for c in lineobj: if c == '\0': raise Error('line contains NULL byte') self.parse_process_char(c) self.parse_process_char('\0') if self.state == START_RECORD: break fields = self.fields self.fields = None return fields
def prepare(self, raw_field, only=None): field = text_type(raw_field if raw_field is not None else '') quoted = self.quoted(field=field, raw_field=raw_field, only=only) escape_re = self.escape_re(quoted=quoted) escapechar = self.escapechar(quoted=quoted) if escape_re.search(field): escapechar = '\\\\' if escapechar == '\\' else escapechar if not escapechar: raise Error('No escapechar is set') escape_replace = r'{escapechar}\1'.format(escapechar=escapechar) field = escape_re.sub(escape_replace, field) if quoted: field = '{quotechar}{field}{quotechar}'.format( quotechar=self.dialect.quotechar, field=field) return field
def _parse_quote_in_quoted_field(self, c): if (self.dialect.quoting != QUOTE_NONE and c == self.dialect.quotechar): # save "" as " self.parse_add_char(c) self.state = IN_QUOTED_FIELD elif c == self.dialect.delimiter: self.parse_save_field() self.state = START_FIELD elif c == '\n' or c == '\r' or c == '\0': # End of line = return [fields] self.parse_save_field() self.state = START_RECORD if c == '\0' else EAT_CRNL elif not self.dialect.strict: self.parse_add_char(c) self.state = IN_FIELD else: # illegal raise Error("{delimiter}' expected after '{quotechar}".format( delimiter=self.dialect.delimiter, quotechar=self.dialect.quotechar, ))
def get_dialect(name): try: return _dialect_registry[name] except KeyError: raise Error('Could not find dialect {0}'.format(name))
def unregister_dialect(name): try: _dialect_registry.pop(name) except KeyError: raise Error('"{name}" not a registered dialect'.format(name=name))
def parse_add_char(self, c): if len(self.field) >= field_size_limit(): raise Error('field size limit exceeded') self.field.append(c)
def quoted(self, field, only, **kwargs): if field == '' and only: raise Error('single empty field record must be quoted') return False