Example #1
0
    def raw(self, sample=False):
        def rows():
            for line in self._sample:
                if PY2:
                    yield line.encode('utf-8')
                else:
                    yield line
            if not sample:
                for line in self.lines:
                    if PY2:
                        yield line.encode('utf-8')
                    else:
                        yield line

        # Fix the maximum field size to something a little larger
        csv.field_size_limit(256000)

        try:
            for row in csv.reader(rows(),
                                  dialect=self._dialect, **self._overrides):
                yield [Cell(to_unicode_or_bust(c)) for c in row]
        except csv.Error as err:
            if u'newline inside string' in unicode_string(err) and sample:
                pass
            elif u'line contains NULL byte' in unicode_string(err):
                pass
            else:
                raise messytables.ReadError('Error reading CSV: %r', err)
Example #2
0
 def cast(self, value):
     if value is None:
         return None
     if isinstance(value, self.result_type):
         return value
     try:
         return unicode_string(value)
     except UnicodeEncodeError:
         return str(value)
Example #3
0
 def cast(self, value):
     if value is None:
         return None
     if isinstance(value, self.result_type):
         return value
     try:
         return unicode_string(value)
     except UnicodeEncodeError:
         return str(value)
Example #4
0
def to_unicode_or_bust(obj, encoding='utf-8'):
    if isinstance(obj, byte_string):
        obj = unicode_string(obj, encoding)
    return obj