Beispiel #1
0
class MetaDataframe(object):
    ''' Provides composition class that is essentially stores a DataFrame; however, all methods/attributes of the dataframe
    are directly accessible by the user.  As such, this object "quacks" like a dataframe, but is merely a Python object.  Thus,
    it can be subclassed easily and also has persistent custom attributes.'''

    def __init__(self, *dfargs, **dfkwargs):
        ''' Stores a dataframe under reserved attribute name, self._df'''
        self._df=DataFrame(*dfargs, **dfkwargs)
        self.a=50


    ### Save /Load methods    
    def save(self, outname):
        ''' Takes in str or opened file and saves. cPickle.dump wrapper.'''
        if isinstance(outname, basestring):
            outname=open(outname, 'w')
        cPickle.dump(self, outname)


    def dumps(self):
        ''' Output TimeSpectra into a pickled string in memory.'''
        return cPickle.dumps(self)

    def deepcopy(self):
        ''' Make a deepcopy of self, including the dataframe.'''
        return copy.deepcopy(self)   

    def as_dataframe(self):
        ''' Convience method to return a raw dataframe, self._df'''
        return self._df    

    #----------------------------------------------------------------------
    # Overwrite Dataframe methods and operators

    def __getitem__(self, key):
        ''' Item lookup'''
        return self._df.__getitem__(key)    

    def __setitem__(self, key, value):
        self._df.__setitem__(key, value)    

    ### These tell python to ignore __getattr__ when pickling; hence, treat this like a normal class    
    def __getstate__(self): return self.__dict__
    def __setstate__(self, d): self.__dict__.update(d)    

    def __getattr__(self, attr, *fcnargs, **fcnkwargs):
        ''' Tells python how to handle all attributes that are not found.  Basic attributes 
        are directly referenced to self._df; however, instance methods (like df.corr() ) are
        handled specially using a special private parsing method, _dfgetattr().'''

        ### Return basic attribute
        refout=getattr(self._df, attr)
        if not isinstance(refout, MethodType):
            return refout

        ### Handle instance methods using _dfgetattr().
        ### see http://stackoverflow.com/questions/3434938/python-allowing-methods-not-specifically-defined-to-be-called-ala-getattr
        else:         
            return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs)
            ### This is a reference to the fuction (aka a wrapper) not the function itself


    def _deepcopy(self, dfnew):
        ''' Copies all attribtues into a new object except has to store current dataframe
        in memory as this can't be copied correctly using copy.deepcopy.  Probably a quicker way...

        dfnew is used if one wants to pass a new dataframe in.  This is used primarily in calls from __getattr__.'''
        ### Store old value of df and remove current df to copy operation will take
        olddf=self._df.copy(deep=True)
        self._df=None

        ### Create new object and apply new df 
        newobj=copy.deepcopy(self)
        newobj.df=dfnew

        ### Restore old value of df and return new object
        self._df=olddf
        return newobj


    def _dfgetattr(self, attr, *fcnargs, **fcnkwargs):
        ''' Called by __getattr__ as a wrapper, this private method is used to ensure that any
        DataFrame method that returns a new DataFrame will actually return a TimeSpectra object
        instead.  It does so by typechecking the return of attr().

        **kwargs: use_base - If true, program attempts to call attribute on the baseline.  Baseline ought
        to be maintained as a series, and Series/Dataframe API's must be same.

        *fcnargs and **fcnkwargs are passed to the dataframe method.

        Note: tried to ad an as_new keyword to do this operation in place, but doing self=dfout instead of return dfout
        didn't work.  Could try to add this at the __getattr__ level; however, may not be worth it.'''

        out=getattr(self._df, attr)(*fcnargs, **fcnkwargs)

        ### If operation returns a dataframe, return new TimeSpectra
        if isinstance(out, DataFrame):
            dfout=self._deepcopy(out)
            return dfout

        ### Otherwise return whatever the method return would be
        else:
            return out

    def __repr__(self):
        ''' Can be customized, but by default, reutrns the output of a standard Dataframe.'''
        return self._df.__repr__()


    @property
    def ix(self):    
        return self._deepcopy(self._df.ix)

    ### Operator overloading ####
    ### In place operations need to overwrite self._df
    def __add__(self, x):
        return self._deepcopy(self._df.__add__(x))

    def __sub__(self, x):
        return self._deepcopy(self._df.__sub__(x))

    def __mul__(self, x):
        return self._deepcopy(self._df.__mul__(x))

    def __div__(self, x):
        return self._deepcopy(self._df.__div__(x))

    def __truediv__(self, x):
        return self._deepcopy(self._df.__truediv__(x))

    ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__()    
    def __neg__(self):  
        return self._deepcopy(self._df.__neg__() )

    ### Object comparison operators
    def __lt__(self, x):
        return self._deepcopy(self._df.__lt__(x))

    def __le__(self, x):
        return self._deepcopy(self._df.__le__(x))

    def __eq__(self, x):
        return self._deepcopy(self._df.__eq__(x))

    def __ne__(self, x):
        return self._deepcopy(self._df.__ne__(x))

    def __ge__(self, x):
        return self._deepcopy(self._df.__ge__(x))

    def __gt__(self, x):
        return self._deepcopy(self._df.__gt__(x))     

    def __len__(self):
        return self._df.__len__()

    def __nonzero__(self):
        return self._df.__nonzero__()

    def __contains__(self, x):
        return self._df.__contains__(x)

    def __iter__(self):
        return self._df.__iter__()
Beispiel #2
0
class MetaDataFrame(object):
    ''' Base composition for subclassing dataframe.'''

    def __init__(self, *dfargs, **dfkwargs):
        ''' Stores a dataframe under reserved attribute name, self._df'''      
        self._df=DataFrame(*dfargs, **dfkwargs)
                
    ### Save methods    
    def save(self, outname):
        ''' Takes in str or opened file and saves. cPickle.dump wrapper.'''
        if isinstance(outname, basestring):
            outname=open(outname, 'w')
        cPickle.dump(self, outname)

    def dumps(self):
        ''' Output TimeSpectra into a pickled string in memory.'''
        return cPickle.dumps(self)

    def deepcopy(self):
        ''' Make a deepcopy of self, including the dataframe.'''
        return copy.deepcopy(self)   

    def as_dataframe(self):
        ''' Convience method to return a raw dataframe, self._df'''
        return self._df    

    #----------------------------------------------------------------------
    # Overwrite Dataframe methods and operators

    def __getitem__(self, keyslice):
        ''' Item lookup.  If output is an interable, _transfer is called.  
        Sometimes __getitem__ returns a float (indexing a series) at which 
        point we just want to return that.'''

        dfout=self._df.__getitem__(keyslice)

        try:
            iter(dfout)  #Test if iterable without forcing user to have collections package.
        except TypeError:
            return dfout
        else:
            return self._transfer(self._df.__getitem__(keyslice) )               

    def __setitem__(self, key, value):
        self._df.__setitem__(key, value)    

    ### These tell python to ignore __getattr__ when pickling; hence, treat this like a normal class    
    def __getstate__(self): return self.__dict__
    def __setstate__(self, d): self.__dict__.update(d)    

    def __getattr__(self, attr, *fcnargs, **fcnkwargs):
        ''' Tells python how to handle all attributes that are not found.  Basic attributes 
        are directly referenced to self._df; however, instance methods (like df.corr() ) are
        handled specially using a special private parsing method, _dfgetattr().'''

        ### Return basic attribute
        
        try:
            refout=getattr(self._df, attr)
        except AttributeError:
            raise AttributeError('Could not find attribute "%s" in %s or its underlying DataFrame'%(attr, self.__class__.__name__))           
           
        if not isinstance(refout, MethodType):
            return refout

        ### Handle instance methods using _dfgetattr().
        ### see http://stackoverflow.com/questions/3434938/python-allowing-methods-not-specifically-defined-to-be-called-ala-getattr
        else:         
            return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs)
            ### This is a reference to the fuction (aka a wrapper) not the function itself
            
    def __setattr__(self, name, value):
        ''' When user sets an attribute, this tries to intercept any name conflicts.  For example, if user attempts to set
        self.columns=50, this will actually try self._df.columns=50, which throws an error.  The behavior is acheived by
        using dir() on the data frame created upon initialization, filtering __x__ type methods.   Not guaranteed to work 100%
        of the time due to implicit possible issues with dir() and inspection in Python.  Best practice is for users to avoid name
        conflicts when possible.'''
        
        super(MetaDataFrame, self).__setattr__(name, value)        
        if name in _dfattrs:
            setattr(self._df, name, value)
        else:
            self.__dict__[name]=value


    def _transfer(self, dfnew):
        ''' Copies all attribtues into a new object except has to store current dataframe
        in memory as this can't be copied correctly using copy.deepcopy.  Probably a quicker way...

        dfnew is used if one wants to pass a new dataframe in.  This is used primarily in calls from __getattr__.'''
        ### Store old value of df and remove current df to copy operation will take
        olddf = self._df.copy() #Removed deep=True because series return could not implement it
        self._df=None

        ### Create new object and apply new df 
        newobj = copy.deepcopy(self)  #This looks like None, but is it type (MetaDataFrame, just __union__ prints None
        newobj._df = dfnew

        ### Restore old value of df and return new object
        self._df=olddf
        return newobj


    def _dfgetattr(self, attr, *fcnargs, **fcnkwargs):
        ''' Called by __getattr__ as a wrapper, this private method is used to ensure that any
        DataFrame method that returns a new DataFrame will actually return a TimeSpectra object
        instead.  It does so by typechecking the return of attr().

        **kwargs: use_base - If true, program attempts to call attribute on the reference.  reference ought
        to be maintained as a series, and Series/Dataframe API's must be same.

        *fcnargs and **fcnkwargs are passed to the dataframe method.

        Note: tried to ad an as_new keyword to do this operation in place, but doing self=dfout instead of return dfout
        didn't work.  Could try to add this at the __getattr__ level; however, may not be worth it.'''

        out=getattr(self._df, attr)(*fcnargs, **fcnkwargs)

        ### If operation returns a dataframe, return new TimeSpectra
        if isinstance(out, DataFrame):
            dfout=self._transfer(out)
            return dfout

        ### Otherwise return whatever the method return would be
        else:
            return out

    def __repr__(self):
        return self._df.__repr__()

    ### Operator overloading ####
    ### In place operations need to overwrite self._df
    def __add__(self, x):
        return self._transfer(self._df.__add__(x))

    def __sub__(self, x):
        return self._transfer(self._df.__sub__(x))

    def __mul__(self, x):
        return self._transfer(self._df.__mul__(x))

    def __div__(self, x):
        return self._transfer(self._df.__div__(x))

    def __truediv__(self, x):
        return self._transfer(self._df.__truediv__(x))

    ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__()    
    def __neg__(self):  
        return self._transfer(self._df.__neg__() )

    ### Object comparison operators
    def __lt__(self, x):
        return self._transfer(self._df.__lt__(x))

    def __le__(self, x):
        return self._transfer(self._df.__le__(x))

    def __eq__(self, x):
        return self._transfer(self._df.__eq__(x))

    def __ne__(self, x):
        return self._transfer(self._df.__ne__(x))

    def __ge__(self, x):
        return self._transfer(self._df.__ge__(x))

    def __gt__(self, x):
        return self._transfer(self._df.__gt__(x))     

    def __len__(self):
        return self._df.__len__()

    def __nonzero__(self):
        return self._df.__nonzero__()

    def __contains__(self, x):
        return self._df.__contains__(x)

    def __iter__(self):
        return self._df.__iter__()
    
    def __pow__(self, exp):
        return self._transfer(self._df.__pow__(exp))


    def iloc(self):
        raise NotImplementedError
    
    def loc(self):
        raise NotImplementedError

    ## Fancy indexing
    _ix=None     
    _iloc=None
        
    @property	  	
    def ix(self, *args, **kwargs):      	
        ''' Pandas Indexing.  Note, this has been modified to ensure that series returns (eg ix[3])
        still maintain attributes.  To remove this behavior, replace the following:
        
        self._ix = _MetaIndexer(self, _IXIndexer(self) ) --> self._ix=_IXIndexer(self)
        
        The above works because slicing preserved attributes because the _IXIndexer is a python object 
        subclass.'''
        if self._ix is None:
            try:
                self._ix=_MetaIndexer(self)
            ### New versions of _IXIndexer require "name" attribute.
            except TypeError as TE:
                self._ix=_MetaIndexer(self, '_ix')
        return self._ix   

    @property	  	
    def iloc(self, *args, **kwargs):      	
        ''' Pandas Indexing.  Note, this has been modified to ensure that series returns (eg ix[3])
        still maintain attributes.  To remove this behavior, replace the following:
        
        self._ix = _MetaIndexer(self, _IXIndexer(self) ) --> self._ix=_IXIndexer(self)
        
        The above works because slicing preserved attributes because the _IXIndexer is a python object 
        subclass.'''
        if self._iloc is None:
            try:
                self._iloc =_IlocMeta(self)
            ### New versions of _IXIndexer require "name" attribute.
            except TypeError as TE:
                self._iloc=_IlocMeta(self, '_iloc')
        return self._iloc