def raw(self, sample=False): def rows(): for line in self._sample: if PY2: yield line.encode('utf-8') else: yield line if not sample: for line in self.lines: if PY2: yield line.encode('utf-8') else: yield line # Fix the maximum field size to something a little larger csv.field_size_limit(256000) try: for row in csv.reader(rows(), dialect=self._dialect, **self._overrides): yield [Cell(to_unicode_or_bust(c)) for c in row] except csv.Error as err: if u'newline inside string' in unicode_string(err) and sample: pass elif u'line contains NULL byte' in unicode_string(err): pass else: raise messytables.ReadError('Error reading CSV: %r', err)
def cast(self, value): if value is None: return None if isinstance(value, self.result_type): return value try: return unicode_string(value) except UnicodeEncodeError: return str(value)
def to_unicode_or_bust(obj, encoding='utf-8'): if isinstance(obj, byte_string): obj = unicode_string(obj, encoding) return obj