def from_xlrdcell(xlrd_cell, sheet, col, row): value = xlrd_cell.value cell_type = XLS_TYPES.get(xlrd_cell.ctype, StringType()) if cell_type == DateType(None): if value == 0: raise InvalidDateError year, month, day, hour, minute, second = \ xlrd.xldate_as_tuple(value, sheet.book.datemode) value = datetime(year, month, day, hour, minute, second) messy_cell = XLSCell(value, type=cell_type) messy_cell.sheet = sheet messy_cell.xlrd_cell = xlrd_cell messy_cell.xlrd_pos = (row, col ) # necessary for properties, note not (x,y) return messy_cell
def raw(self, sample=False): """ Iterate over all rows in this sheet. Types are automatically converted according to the excel data types specified, including conversion of excel dates, which are notoriously buggy. """ num_rows = self.sheet.get_highest_row() for i in xrange(min(self.window, num_rows) if sample else num_rows): row = [] for cell in self.sheet.rows[i]: value = cell.value if cell.is_date(): type = DateType(None) elif cell.data_type == 'n': type = IntegerType() else: type = StringType() row.append(Cell(value, type=type)) yield row
def raw(self, sample=False): """ Iterate over all rows in this sheet. Types are automatically converted according to the excel data types specified, including conversion of excel dates, which are notoriously buggy. """ num_rows = self.sheet.nrows for i in xrange(min(self.window, num_rows) if sample else num_rows): row = [] for j, cell in enumerate(self.sheet.row(i)): value = cell.value type = XLS_TYPES.get(cell.ctype, StringType()) if type == DateType(None): if value == 0: raise ValueError('Invalid date at "%s":%d,%d' % (self.sheet.name, j + 1, i + 1)) year, month, day, hour, minute, second = \ xlrd.xldate_as_tuple(value, self.sheet.book.datemode) value = datetime(year, month, day, hour, minute, second) row.append(Cell(value, type=type)) yield row
from datetime import datetime import xlrd from messytables.core import RowSet, TableSet, Cell from messytables.types import StringType, IntegerType, \ DateType, FloatType XLS_TYPES = { 1: StringType(), # NB: Excel does not distinguish floats from integers so we use floats # We could try actual type detection between floats and ints later # or use the excel format string info - see # https://groups.google.com/forum/?fromgroups=#!topic/ # python-excel/cAQ1ndsCVxk 2: FloatType(), 3: DateType(None), # this is actually boolean but we do not have a boolean type yet 4: IntegerType() } class XLSTableSet(TableSet): """An excel workbook wrapper object. """ def __init__(self, fileobj=None, filename=None, window=None, encoding=None): '''Initilize the tableset.
from lxml import etree from messytables.core import RowSet, TableSet, Cell from messytables.types import (StringType, DecimalType, DateType) ODS_NAMESPACES_TAG_MATCH = re.compile("(<office:document-content[^>]*>)", re.MULTILINE) ODS_TABLE_MATCH = re.compile(".*?(<table:table.*?<\/.*?:table>).*?", re.MULTILINE) ODS_TABLE_NAME = re.compile('.*?table:name=\"(.*?)\".*?') ODS_ROW_MATCH = re.compile(".*?(<table:table-row.*?<\/.*?:table-row>).*?", re.MULTILINE) ODS_TYPES = { 'float': DecimalType(), 'date': DateType(None), } class ODSTableSet(TableSet): """ A wrapper around ODS files. Because they are zipped and the info we want is in the zipped file as content.xml we must ensure that we either have a seekable object (local file) or that we retrieve all of the content from the remote URL. """ def __init__(self, fileobj, window=None): '''Initialize the object. :param fileobj: may be a file path or a file-like object. Note the file-like object *must* be in binary mode and must be seekable (it will
VALUE_TYPE = 'value-type' COLUMN_REPEAT = 'number-columns-repeated' EMPTY_CELL_VALUE = '' ODS_VALUE_TOKEN = { "float": "value", "date": "date-value", "time": "time-value", "boolean": "boolean-value", "percentage": "value", "currency": "value" } ODS_TYPES = { 'float': DecimalType(), 'date': DateType('%Y-%m-%d'), 'boolean': BoolType(), 'percentage': PercentageType(), 'time': TimeType() } class ODSTableSet(TableSet): """ A wrapper around ODS files. Because they are zipped and the info we want is in the zipped file as content.xml we must ensure that we either have a seekable object (local file) or that we retrieve all of the content from the remote URL. """ def __init__(self, fileobj, window=None, **kw): '''Initialize the object.