def createReader(readable, **keywords): """ A reader fitting the contents of ``readable``. Supported formats are delimited data (such as CSV), ODS and Excel. When iterating the reader, it returns a Python array for each row of data. """ # TODO: Get rid of circular import. import _parsers assert readable is not None encoding = keywords.get("encoding", DEFAULT_ENCODING) assert encoding is not None result = None icdHeader = readable.read(4) _log.debug(u"header=%r", icdHeader) if icdHeader == _ODS_HEADER: # Consider ICD to be ODS. readable.seek(0) result = _parsers.odsReader(readable) else: icdHeader += readable.read(4) readable.seek(0) if _tools.isEqualBytes(icdHeader, _EXCEL_HEADER): # Consider ICD to be Excel. result = _parsers.excelReader(readable) else: # Consider ICD to be CSV. dialect = _parsers.DelimitedDialect() dialect.lineDelimiter = _parsers.AUTO dialect.itemDelimiter = _parsers.AUTO dialect.quoteChar = "\"" dialect.escapeChar = "\"" result = _parsers.delimitedReader(readable, dialect, encoding) return result
def createDataFormat(readable, **keywords): """ Data format describing the contents of ``readable``, which should be a a raw binary input stream as returned by ``open(..., 'rb')``. Do not use ``codecs.open(...)`` because it returns Unicode strings instead of raw strings. Supported formats are delimited data (such as CSV), ODS and Excel. """ assert readable is not None encoding = keywords.get("encoding", DEFAULT_ENCODING) assert encoding is not None icdHeader = readable.read(4) _log.debug(u"header=%r", icdHeader) if _tools.isEqualBytes(icdHeader, _ODS_HEADER): # Consider ICD to be ODS. dataFormatName = data.FORMAT_ODS else: icdHeader += readable.read(4) assert isinstance(icdHeader, str), u"icdHeader=%r but must be a string; use open(..., 'rb') instead of codecs.open()" % icdHeader assert isinstance(_EXCEL_HEADER, str), u"_EXCEL_HEADER=%r" % _EXCEL_HEADER if _tools.isEqualBytes(icdHeader, _EXCEL_HEADER): # Consider ICD to be Excel. dataFormatName = data.FORMAT_EXCEL else: # Consider ICD to be CSV. dataFormatName = data.FORMAT_DELIMITED result = data.createDataFormat(dataFormatName) if result.name == data.FORMAT_DELIMITED: readable.seek(0) options = delimitedOptions(readable, **keywords) for key, value in options.items(): propertyName = _tools.decamelized(key) if key == _LINE_DELIMITER: value = _LINE_DELIMITER_TO_NAME_MAP[value] result.set(propertyName, value) readable.seek(0) return result