Beispiel #1
0
def createReader(readable, **keywords):
    """
    A reader fitting the contents of ``readable``. Supported formats are
    delimited data (such as CSV), ODS and Excel. When iterating the reader,
    it returns a Python array for each row of data.
    """
    # TODO: Get rid of circular import.
    import _parsers

    assert readable is not None
    encoding = keywords.get("encoding", DEFAULT_ENCODING)
    assert encoding is not None

    result = None
    icdHeader = readable.read(4)
    _log.debug(u"header=%r", icdHeader)
    if icdHeader == _ODS_HEADER:
        # Consider ICD to be ODS.
        readable.seek(0)
        result = _parsers.odsReader(readable)
    else:
        icdHeader += readable.read(4)
        readable.seek(0)
        if _tools.isEqualBytes(icdHeader, _EXCEL_HEADER):
            # Consider ICD to be Excel.
            result = _parsers.excelReader(readable)
        else:
            # Consider ICD to be CSV.
            dialect = _parsers.DelimitedDialect()
            dialect.lineDelimiter = _parsers.AUTO
            dialect.itemDelimiter = _parsers.AUTO
            dialect.quoteChar = "\""
            dialect.escapeChar = "\""
            result = _parsers.delimitedReader(readable, dialect, encoding)
    return result
Beispiel #2
0
def createDataFormat(readable, **keywords):
    """
    Data format describing the contents of ``readable``, which should be a
    a raw binary input stream as returned by ``open(..., 'rb')``. Do not use
    ``codecs.open(...)`` because it returns Unicode strings instead of raw
    strings.

    Supported formats are delimited data (such as CSV), ODS and Excel.
    """
    assert readable is not None
    encoding = keywords.get("encoding", DEFAULT_ENCODING)
    assert encoding is not None

    icdHeader = readable.read(4)
    _log.debug(u"header=%r", icdHeader)
    if _tools.isEqualBytes(icdHeader, _ODS_HEADER):
        # Consider ICD to be ODS.
        dataFormatName = data.FORMAT_ODS
    else:
        icdHeader += readable.read(4)
        assert isinstance(icdHeader, str), u"icdHeader=%r but must be a string; use open(..., 'rb') instead of codecs.open()" % icdHeader
        assert isinstance(_EXCEL_HEADER, str), u"_EXCEL_HEADER=%r" % _EXCEL_HEADER
        if _tools.isEqualBytes(icdHeader, _EXCEL_HEADER):
            # Consider ICD to be Excel.
            dataFormatName = data.FORMAT_EXCEL
        else:
            # Consider ICD to be CSV.
            dataFormatName = data.FORMAT_DELIMITED
    result = data.createDataFormat(dataFormatName)
    if result.name == data.FORMAT_DELIMITED:
        readable.seek(0)
        options = delimitedOptions(readable, **keywords)
        for key, value in options.items():
            propertyName = _tools.decamelized(key)
            if key == _LINE_DELIMITER:
                value = _LINE_DELIMITER_TO_NAME_MAP[value]
            result.set(propertyName, value)
    readable.seek(0)
    return result