class MetaDataframe(object): ''' Provides composition class that is essentially stores a DataFrame; however, all methods/attributes of the dataframe are directly accessible by the user. As such, this object "quacks" like a dataframe, but is merely a Python object. Thus, it can be subclassed easily and also has persistent custom attributes.''' def __init__(self, *dfargs, **dfkwargs): ''' Stores a dataframe under reserved attribute name, self._df''' self._df=DataFrame(*dfargs, **dfkwargs) self.a=50 ### Save /Load methods def save(self, outname): ''' Takes in str or opened file and saves. cPickle.dump wrapper.''' if isinstance(outname, basestring): outname=open(outname, 'w') cPickle.dump(self, outname) def dumps(self): ''' Output TimeSpectra into a pickled string in memory.''' return cPickle.dumps(self) def deepcopy(self): ''' Make a deepcopy of self, including the dataframe.''' return copy.deepcopy(self) def as_dataframe(self): ''' Convience method to return a raw dataframe, self._df''' return self._df #---------------------------------------------------------------------- # Overwrite Dataframe methods and operators def __getitem__(self, key): ''' Item lookup''' return self._df.__getitem__(key) def __setitem__(self, key, value): self._df.__setitem__(key, value) ### These tell python to ignore __getattr__ when pickling; hence, treat this like a normal class def __getstate__(self): return self.__dict__ def __setstate__(self, d): self.__dict__.update(d) def __getattr__(self, attr, *fcnargs, **fcnkwargs): ''' Tells python how to handle all attributes that are not found. Basic attributes are directly referenced to self._df; however, instance methods (like df.corr() ) are handled specially using a special private parsing method, _dfgetattr().''' ### Return basic attribute refout=getattr(self._df, attr) if not isinstance(refout, MethodType): return refout ### Handle instance methods using _dfgetattr(). ### see http://stackoverflow.com/questions/3434938/python-allowing-methods-not-specifically-defined-to-be-called-ala-getattr else: return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs) ### This is a reference to the fuction (aka a wrapper) not the function itself def _deepcopy(self, dfnew): ''' Copies all attribtues into a new object except has to store current dataframe in memory as this can't be copied correctly using copy.deepcopy. Probably a quicker way... dfnew is used if one wants to pass a new dataframe in. This is used primarily in calls from __getattr__.''' ### Store old value of df and remove current df to copy operation will take olddf=self._df.copy(deep=True) self._df=None ### Create new object and apply new df newobj=copy.deepcopy(self) newobj.df=dfnew ### Restore old value of df and return new object self._df=olddf return newobj def _dfgetattr(self, attr, *fcnargs, **fcnkwargs): ''' Called by __getattr__ as a wrapper, this private method is used to ensure that any DataFrame method that returns a new DataFrame will actually return a TimeSpectra object instead. It does so by typechecking the return of attr(). **kwargs: use_base - If true, program attempts to call attribute on the baseline. Baseline ought to be maintained as a series, and Series/Dataframe API's must be same. *fcnargs and **fcnkwargs are passed to the dataframe method. Note: tried to ad an as_new keyword to do this operation in place, but doing self=dfout instead of return dfout didn't work. Could try to add this at the __getattr__ level; however, may not be worth it.''' out=getattr(self._df, attr)(*fcnargs, **fcnkwargs) ### If operation returns a dataframe, return new TimeSpectra if isinstance(out, DataFrame): dfout=self._deepcopy(out) return dfout ### Otherwise return whatever the method return would be else: return out def __repr__(self): ''' Can be customized, but by default, reutrns the output of a standard Dataframe.''' return self._df.__repr__() @property def ix(self): return self._deepcopy(self._df.ix) ### Operator overloading #### ### In place operations need to overwrite self._df def __add__(self, x): return self._deepcopy(self._df.__add__(x)) def __sub__(self, x): return self._deepcopy(self._df.__sub__(x)) def __mul__(self, x): return self._deepcopy(self._df.__mul__(x)) def __div__(self, x): return self._deepcopy(self._df.__div__(x)) def __truediv__(self, x): return self._deepcopy(self._df.__truediv__(x)) ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__() def __neg__(self): return self._deepcopy(self._df.__neg__() ) ### Object comparison operators def __lt__(self, x): return self._deepcopy(self._df.__lt__(x)) def __le__(self, x): return self._deepcopy(self._df.__le__(x)) def __eq__(self, x): return self._deepcopy(self._df.__eq__(x)) def __ne__(self, x): return self._deepcopy(self._df.__ne__(x)) def __ge__(self, x): return self._deepcopy(self._df.__ge__(x)) def __gt__(self, x): return self._deepcopy(self._df.__gt__(x)) def __len__(self): return self._df.__len__() def __nonzero__(self): return self._df.__nonzero__() def __contains__(self, x): return self._df.__contains__(x) def __iter__(self): return self._df.__iter__()
class MetaDataFrame(object): ''' Base composition for subclassing dataframe.''' def __init__(self, *dfargs, **dfkwargs): ''' Stores a dataframe under reserved attribute name, self._df''' self._df=DataFrame(*dfargs, **dfkwargs) ### Save methods def save(self, outname): ''' Takes in str or opened file and saves. cPickle.dump wrapper.''' if isinstance(outname, basestring): outname=open(outname, 'w') cPickle.dump(self, outname) def dumps(self): ''' Output TimeSpectra into a pickled string in memory.''' return cPickle.dumps(self) def deepcopy(self): ''' Make a deepcopy of self, including the dataframe.''' return copy.deepcopy(self) def as_dataframe(self): ''' Convience method to return a raw dataframe, self._df''' return self._df #---------------------------------------------------------------------- # Overwrite Dataframe methods and operators def __getitem__(self, keyslice): ''' Item lookup. If output is an interable, _transfer is called. Sometimes __getitem__ returns a float (indexing a series) at which point we just want to return that.''' dfout=self._df.__getitem__(keyslice) try: iter(dfout) #Test if iterable without forcing user to have collections package. except TypeError: return dfout else: return self._transfer(self._df.__getitem__(keyslice) ) def __setitem__(self, key, value): self._df.__setitem__(key, value) ### These tell python to ignore __getattr__ when pickling; hence, treat this like a normal class def __getstate__(self): return self.__dict__ def __setstate__(self, d): self.__dict__.update(d) def __getattr__(self, attr, *fcnargs, **fcnkwargs): ''' Tells python how to handle all attributes that are not found. Basic attributes are directly referenced to self._df; however, instance methods (like df.corr() ) are handled specially using a special private parsing method, _dfgetattr().''' ### Return basic attribute try: refout=getattr(self._df, attr) except AttributeError: raise AttributeError('Could not find attribute "%s" in %s or its underlying DataFrame'%(attr, self.__class__.__name__)) if not isinstance(refout, MethodType): return refout ### Handle instance methods using _dfgetattr(). ### see http://stackoverflow.com/questions/3434938/python-allowing-methods-not-specifically-defined-to-be-called-ala-getattr else: return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs) ### This is a reference to the fuction (aka a wrapper) not the function itself def __setattr__(self, name, value): ''' When user sets an attribute, this tries to intercept any name conflicts. For example, if user attempts to set self.columns=50, this will actually try self._df.columns=50, which throws an error. The behavior is acheived by using dir() on the data frame created upon initialization, filtering __x__ type methods. Not guaranteed to work 100% of the time due to implicit possible issues with dir() and inspection in Python. Best practice is for users to avoid name conflicts when possible.''' super(MetaDataFrame, self).__setattr__(name, value) if name in _dfattrs: setattr(self._df, name, value) else: self.__dict__[name]=value def _transfer(self, dfnew): ''' Copies all attribtues into a new object except has to store current dataframe in memory as this can't be copied correctly using copy.deepcopy. Probably a quicker way... dfnew is used if one wants to pass a new dataframe in. This is used primarily in calls from __getattr__.''' ### Store old value of df and remove current df to copy operation will take olddf = self._df.copy() #Removed deep=True because series return could not implement it self._df=None ### Create new object and apply new df newobj = copy.deepcopy(self) #This looks like None, but is it type (MetaDataFrame, just __union__ prints None newobj._df = dfnew ### Restore old value of df and return new object self._df=olddf return newobj def _dfgetattr(self, attr, *fcnargs, **fcnkwargs): ''' Called by __getattr__ as a wrapper, this private method is used to ensure that any DataFrame method that returns a new DataFrame will actually return a TimeSpectra object instead. It does so by typechecking the return of attr(). **kwargs: use_base - If true, program attempts to call attribute on the reference. reference ought to be maintained as a series, and Series/Dataframe API's must be same. *fcnargs and **fcnkwargs are passed to the dataframe method. Note: tried to ad an as_new keyword to do this operation in place, but doing self=dfout instead of return dfout didn't work. Could try to add this at the __getattr__ level; however, may not be worth it.''' out=getattr(self._df, attr)(*fcnargs, **fcnkwargs) ### If operation returns a dataframe, return new TimeSpectra if isinstance(out, DataFrame): dfout=self._transfer(out) return dfout ### Otherwise return whatever the method return would be else: return out def __repr__(self): return self._df.__repr__() ### Operator overloading #### ### In place operations need to overwrite self._df def __add__(self, x): return self._transfer(self._df.__add__(x)) def __sub__(self, x): return self._transfer(self._df.__sub__(x)) def __mul__(self, x): return self._transfer(self._df.__mul__(x)) def __div__(self, x): return self._transfer(self._df.__div__(x)) def __truediv__(self, x): return self._transfer(self._df.__truediv__(x)) ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__() def __neg__(self): return self._transfer(self._df.__neg__() ) ### Object comparison operators def __lt__(self, x): return self._transfer(self._df.__lt__(x)) def __le__(self, x): return self._transfer(self._df.__le__(x)) def __eq__(self, x): return self._transfer(self._df.__eq__(x)) def __ne__(self, x): return self._transfer(self._df.__ne__(x)) def __ge__(self, x): return self._transfer(self._df.__ge__(x)) def __gt__(self, x): return self._transfer(self._df.__gt__(x)) def __len__(self): return self._df.__len__() def __nonzero__(self): return self._df.__nonzero__() def __contains__(self, x): return self._df.__contains__(x) def __iter__(self): return self._df.__iter__() def __pow__(self, exp): return self._transfer(self._df.__pow__(exp)) def iloc(self): raise NotImplementedError def loc(self): raise NotImplementedError ## Fancy indexing _ix=None _iloc=None @property def ix(self, *args, **kwargs): ''' Pandas Indexing. Note, this has been modified to ensure that series returns (eg ix[3]) still maintain attributes. To remove this behavior, replace the following: self._ix = _MetaIndexer(self, _IXIndexer(self) ) --> self._ix=_IXIndexer(self) The above works because slicing preserved attributes because the _IXIndexer is a python object subclass.''' if self._ix is None: try: self._ix=_MetaIndexer(self) ### New versions of _IXIndexer require "name" attribute. except TypeError as TE: self._ix=_MetaIndexer(self, '_ix') return self._ix @property def iloc(self, *args, **kwargs): ''' Pandas Indexing. Note, this has been modified to ensure that series returns (eg ix[3]) still maintain attributes. To remove this behavior, replace the following: self._ix = _MetaIndexer(self, _IXIndexer(self) ) --> self._ix=_IXIndexer(self) The above works because slicing preserved attributes because the _IXIndexer is a python object subclass.''' if self._iloc is None: try: self._iloc =_IlocMeta(self) ### New versions of _IXIndexer require "name" attribute. except TypeError as TE: self._iloc=_IlocMeta(self, '_iloc') return self._iloc