def __init__(self, *args, **kwargs): self.colnames = kwargs.pop('colnames', ()) """Column names.""" self.name = kwargs.pop('name', 'data_table') """Store the name os the 'DataTable' object.""" self.capacity = kwargs.pop('capacity', 0) """Store the max capacity of rows in the container.""" self.firstrow_header = kwargs.pop('firstrow_header', False) """Identify if the firstrow is a header line.""" # input_converter = kwargs.pop('input_converter', True) # """Disable input conversion to 'tuple' object (more speed).""" if kwargs: raise DataTableError("Unexpected keyword arguments (%r)" % kwargs) self.events = SDict(onAppend=Signal(), onInsert=Signal()) if self.capacity: # Connect events to handlers. self.events.onAppend.append(self._capacity_checker) self.events.onInsert.append(self._capacity_checker) self.list = list() self.extend(args) if args and not self.colnames: if self.firstrow_header: self.colnames = tuple(self.list.pop(0)) else: self.colnames = tuple("C%i" % x for x in range(len(self.list[0])))
class DataTable(collections.MutableSequence): u"""Container class for store data. """ colnames = ColnameDesc('colnames') name = NameDesc('name') capacity = CapacityDesc('capacity') def __init__(self, *args, **kwargs): self.colnames = kwargs.pop('colnames', ()) """Column names.""" self.name = kwargs.pop('name', 'data_table') """Store the name os the 'DataTable' object.""" self.capacity = kwargs.pop('capacity', 0) """Store the max capacity of rows in the container.""" self.firstrow_header = kwargs.pop('firstrow_header', False) """Identify if the firstrow is a header line.""" # input_converter = kwargs.pop('input_converter', True) # """Disable input conversion to 'tuple' object (more speed).""" if kwargs: raise DataTableError("Unexpected keyword arguments (%r)" % kwargs) self.events = SDict(onAppend=Signal(), onInsert=Signal()) if self.capacity: # Connect events to handlers. self.events.onAppend.append(self._capacity_checker) self.events.onInsert.append(self._capacity_checker) self.list = list() self.extend(args) if args and not self.colnames: if self.firstrow_header: self.colnames = tuple(self.list.pop(0)) else: self.colnames = tuple("C%i" % x for x in range(len(self.list[0]))) def __iter__(self): for row in self.list: yield row def __len__(self): return len(self.list) def __add__(self, value): if value is not self: self.extend(value) else: for row in self.list[:]: self.append(row) return self __iadd__ = __add__ def __getitem__(self, item): if isinstance(item, int): return self.list[item] elif isinstance(item, basestring): if item not in self.colnames: raise DataTableColumnError("Column '%s' not found" % item) idx = self.colnames.index(item) # Add header name in the first position l = (item,) + tuple(x[idx] for x in self.list) return l def __delitem__(self, index): def delitem(idx): del self.list[idx] if isinstance(index, slice): [delitem(i) for i in xrange(*index.indices(len(self)))] else: delitem(index) def __setitem__(self, index, row): # TODO(Alejandro): implement case when item is a 'slice' try: self.list[index] = tuple(row) except IndexError: raise DataTableError("Index '%i' not created" % index) def __repr__(self): return "DataTable(%s)" % self.list def __str__(self): return str(self.list) # # built-in event handlers. # def _capacity_checker(self): if self.count >= self.capacity: raise DataTableCapacityError( "Maximun capacity reached, stop ('%i')" % self.capacity) # # Special methods # def append(self, row): """""" self.events.onAppend() self.list.append(tuple(row)) def insert(self, index, row): self.events.onInsert() self.list.insert(index, tuple(row)) @fluent def filter(self, expr): u"""Filter container data.""" data_kernel = self if not is_attribute_access(data_kernel[0], self.colnames): # Necessary for attribute access expr = expr_decorator(expr, self.colnames) return ifilter(expr, data_kernel) #return (x for x in data_kernel if expr(x)) @fluent def select(self, *fields, **kwargs): u"""Select fields in the object.""" if not all(isinstance(x, basestring) for x in fields): raise DataTableColumnError("Use only string types for parameter " "'fields'.") invalid_colnames = set(fields) - set(self.colnames) if invalid_colnames: raise DataTableColumnError("Column '%s' not found" % ', '.join(invalid_colnames)) data_kernel = self where = kwargs.pop('where', lambda x: x) if not is_attribute_access(data_kernel[0], self.colnames): # Necessary for attribute access expr = expr_decorator(where, self.colnames) if len(fields) == 1: field_index = self.colnames.index(fields[0]) getter = lambda row: (row[field_index],) else: getter = operator.itemgetter(*fields2index(fields, self.colnames)) # getter transfor to tuples return imap(getter, ifilter(expr, data_kernel)) @fluent def distinct(self, *fields): """Return new 'datatable' with distinct rows.""" data_kernel = self if fields: data_kernel = self.select(*fields) return f_distinct(data_kernel) @fluent def dup(self, *fields): """Return new 'datatable' with distinct rows.""" data_kernel = self if fields: data_kernel = self.select(*fields) return f_dup(data_kernel) @fluent def add_field(self, name, value='', index=-1): """""" if callable(value): expr = expr_decorator(value, self.colnames) data_kernel = (tuple_insert(row, index, expr(row)) for row in self) else: data_kernel = (tuple_insert(row, index, value) for row in self) if index == -1: index = len(self.colnames) self.colnames = tuple_insert(self.colnames, index, name) return data_kernel def clear(self, init=0, offset=0): u"""Clear object.""" del self[init: init+offset] @property def is_initialized(self): u"""""" return bool(self.count) @property def count(self): return len(self) @property def shape(self): rows = self.count cols = max(len(x) for x in self) return (cols, rows)
def inner(row): sdict = SDict(zip(colnames, row)) return method(sdict)