def createReader(readable, **keywords): """ A reader fitting the contents of ``readable``. Supported formats are delimited data (such as CSV), ODS and Excel. When iterating the reader, it returns a Python array for each row of data. """ # TODO: Get rid of circular import. import _parsers assert readable is not None encoding = keywords.get("encoding", DEFAULT_ENCODING) assert encoding is not None result = None icdHeader = readable.read(4) _log.debug(u"header=%r", icdHeader) if icdHeader == _ODS_HEADER: # Consider ICD to be ODS. readable.seek(0) result = _parsers.odsReader(readable) else: icdHeader += readable.read(4) readable.seek(0) if _tools.isEqualBytes(icdHeader, _EXCEL_HEADER): # Consider ICD to be Excel. result = _parsers.excelReader(readable) else: # Consider ICD to be CSV. dialect = _parsers.DelimitedDialect() dialect.lineDelimiter = _parsers.AUTO dialect.itemDelimiter = _parsers.AUTO dialect.quoteChar = "\"" dialect.escapeChar = "\"" result = _parsers.delimitedReader(readable, dialect, encoding) return result
def testReader(self): dialect = self._createDefaultDialect() dataStream = StringIO.StringIO("hugo,was" + _parsers.LF + "here,again") csvReader = _parsers.delimitedReader(dataStream, dialect) rowCount = 0 for row in csvReader: rowCount += 1 self.assertEqual(2, len(row)) self.assertEqual(2, rowCount)
def testAutoItemDelimiter(self): dialect = self._createDefaultDialect() dialect.itemDelimiter = _parsers.AUTO dataStream = StringIO.StringIO("some;items;using;a;semicolon;as;separator") csvReader = _parsers.delimitedReader(dataStream, dialect) rowCount = 0 for row in csvReader: rowCount += 1 self.assertEqual(7, len(row)) self.assertEqual(1, rowCount)
def testAutoItemDelimiter(self): dialect = self._createDefaultDialect() dialect.itemDelimiter = _parsers.AUTO dataStream = StringIO.StringIO( "some;items;using;a;semicolon;as;separator") csvReader = _parsers.delimitedReader(dataStream, dialect) rowCount = 0 for row in csvReader: rowCount += 1 self.assertEqual(7, len(row)) self.assertEqual(1, rowCount)
def _assertRowsEqual(self, expectedRows, readable, dialect=None): """ Simply parse all items of `readable` using `dialect` and assert that the number of items read matches `expectedItem`. """ assert expectedRows is not None assert readable is not None actualReadable = self.possiblyStringIoedReadable(readable) if dialect is None: actualDialect = self._createDefaultDialect() else: actualDialect = dialect reader = _parsers.delimitedReader(actualReadable, actualDialect) self.readAndAssertEquals(expectedRows, reader)
def _assertRaisesParserSyntaxError(self, readable, dialect=None): """ Attempt to parse all items of `readable` using `dialect` and assert that this raises _`_parsers.ParserSyntaxError`. """ assert readable is not None actualReadable = self.possiblyStringIoedReadable(readable) if dialect is None: actualDialect = self._createDefaultDialect() else: actualDialect = dialect try: reader = _parsers.delimitedReader(actualReadable, actualDialect) for dummy in reader: pass # FIXME: self.fail(u"readable must raise %s" % _parsers.ParserSyntaxError.__name__) except _parsers.ParserSyntaxError: # Ignore expected error. pass