Example #1
0
    def test_repr_should_return_str(self):
        """
        http://docs.python.org/py3k/reference/datamodel.html#object.__repr__
        http://docs.python.org/reference/datamodel.html#object.__repr__
        "...The return value must be a string object."

        (str on py2.x, str (unicode) on py3)

        """
        data = [8, 5, 3, 5]
        index1 = [u"\u03c3", u"\u03c4", u"\u03c5", u"\u03c6"]
        cols = [u"\u03c8"]
        df = DataFrame(data, columns=cols, index=index1)
        self.assertTrue(type(df.__repr__() == str))  # both py2 / 3
Example #2
0
class MetaDataFrame(object):
    """ Base composition for subclassing dataframe."""

    def __init__(self, *dfargs, **dfkwargs):
        """ Stores a dataframe under reserved attribute name, self._df"""
        self._df = DataFrame(*dfargs, **dfkwargs)

    ### Save methods
    def save(self, outname):
        """ Takes in str or opened file and saves. cPickle.dump wrapper."""
        if isinstance(outname, basestring):
            outname = open(outname, "w")
        cPickle.dump(self, outname)

    def dumps(self):
        """ Output TimeSpectra into a pickled string in memory."""
        return cPickle.dumps(self)

    def as_dataframe(self):
        """ Convience method to return a raw dataframe, self._df"""
        return self._df

    # ----------------------------------------------------------------------
    # Overwrite Dataframe methods and operators

    def __getitem__(self, keyslice):
        """ Item lookup.  If output is an interable, _transfer is called.  
        Sometimes __getitem__ returns a float (indexing a series) at which 
        point we just want to return that."""

        dfout = self._df.__getitem__(keyslice)

        try:
            iter(dfout)  # Test if iterable without forcing user to have collections package.
        except TypeError:
            return dfout
        else:
            return self._transfer(self._df.__getitem__(keyslice))

    def __setitem__(self, key, value):
        self._df.__setitem__(key, value)

    ### These tell python to ignore __getattr__ when pickling; hence, treat this like a normal class
    def __getstate__(self):
        return self.__dict__

    def __setstate__(self, d):
        self.__dict__.update(d)

    def __getattr__(self, attr, *fcnargs, **fcnkwargs):
        """ Tells python how to handle all attributes that are not found.
        Basic attributes are directly referenced to self._df; however, 
        instance methods (like df.corr() ) are handled specially using a
        special private parsing method, _dfgetattr()."""

        ### Return basic attribute

        try:
            refout = getattr(self._df, attr)
        except AttributeError:
            raise AttributeError(
                'Could not find attribute "%s" in %s or its underlying DataFrame' % (attr, self.__class__.__name__)
            )

        if not isinstance(refout, MethodType):
            return refout

        ### Handle instance methods using _dfgetattr().
        ### see http://stackoverflow.com/questions/3434938/python-allowing-methods-not-specifically-defined-to-be-called-ala-getattr
        else:
            return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs)
            ### This is a reference to the fuction (aka a wrapper) not the function itself

    def __setattr__(self, name, value):
        """ When user sets an attribute, this tries to intercept any name conflicts.  For example, if user attempts to set
        self.columns=50, this will actually try self._df.columns=50, which throws an error.  The behavior is acheived by
        using dir() on the data frame created upon initialization, filtering __x__ type methods.   Not guaranteed to work 100%
        of the time due to implicit possible issues with dir() and inspection in Python.  Best practice is for users to avoid name
        conflicts when possible."""

        super(MetaDataFrame, self).__setattr__(name, value)
        if name in _dfattrs:
            setattr(self._df, name, value)
        else:
            self.__dict__[name] = value

    def _transfer(self, dfnew):
        """ Copy current attributes into a new dataframe.  For methods that
        return a dataframe and need to append current attributes/columns/index.
        """
        newobj = copy.deepcopy(self)  # This looks like None, but is it type (MetaDataFrame, just __union__ prints None
        newobj._df = dfnew

        # THESE ARE NEVER TRANSFERED AT DF LEVEL, JUST CREATED NEW.  TRY
        # df.loc
        # a = df*50
        # a._loc  ---> Will be None
        # newobj._loc = self._loc
        # newobj._iloc = self._iloc
        # newobj._ix = self._ix
        return newobj

    def deepcopy(self):
        """ Make a deepcopy of self, including the dataframe."""
        return copy.deepcopy(self)

    def _dfgetattr(self, attr, *fcnargs, **fcnkwargs):
        """ Called by __getattr__ as a wrapper, this private method is used 
        to ensure that any DataFrame method that returns a new DataFrame 
        will actually return a TimeSpectra object
        instead.  It does so by typechecking the return of attr().

        **kwargs: use_base - If true, program attempts to call attribute on
        the reference.  reference ought to be maintained as a series, and 
        Series/Dataframe API's must be same.

        *fcnargs and **fcnkwargs are passed to the dataframe method.

        Note: tried to ad an as_new keyword to do this operation in place, 
        but doing self=dfout instead of return dfout didn't work.  Could 
        try to add this at the __getattr__ level; however, may not be worth it.
        """

        out = getattr(self._df, attr)(*fcnargs, **fcnkwargs)

        ### If operation returns a dataframe, return new TimeSpectra
        if isinstance(out, DataFrame):  # metadataframe or won't have _transfer method
            dfout = self._transfer(out)
            return dfout

        ### Otherwise return whatever the method return would be
        else:
            return out

    def _repr_html_(self):
        """ Allows ipython notebook to display as default html"""
        return self._df._repr_html_()

    def __repr__(self):
        return self._df.__repr__()

    ### Operator overloading ####
    ### In place operations need to overwrite self._df
    def __add__(self, x):
        return self._transfer(self._df.__add__(x))

    def __sub__(self, x):
        return self._transfer(self._df.__sub__(x))

    def __mul__(self, x):
        return self._transfer(self._df.__mul__(x))

    def __div__(self, x):
        return self._transfer(self._df.__div__(x))

    def __truediv__(self, x):
        return self._transfer(self._df.__truediv__(x))

    ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__()
    def __neg__(self):
        return self._transfer(self._df.__neg__())

    ### Object comparison operators
    def __lt__(self, x):
        return self._transfer(self._df.__lt__(x))

    def __le__(self, x):
        return self._transfer(self._df.__le__(x))

    def __eq__(self, x):
        return self._transfer(self._df.__eq__(x))

    def __ne__(self, x):
        return self._transfer(self._df.__ne__(x))

    def __ge__(self, x):
        return self._transfer(self._df.__ge__(x))

    def __gt__(self, x):
        return self._transfer(self._df.__gt__(x))

    def __len__(self):
        return self._df.__len__()

    def __nonzero__(self):
        return self._df.__nonzero__()

    def __contains__(self, x):
        return self._df.__contains__(x)

    def __iter__(self):
        return self._df.__iter__()

    def __pow__(self, exp):
        return self._transfer(self._df.__pow__(exp))

    @property
    def index(self):
        return self._df.index

    @property
    def columns(self):
        return self._df.columns

    # To avoid accidentally setting index ie ts.index()
    @index.setter
    def index(self, index):
        self._df.index = index

    @columns.setter
    def columns(self, columns):
        self._df.columns = columns

    ## Fancy indexing
    _ix = None
    _iloc = None
    _loc = None

    @property
    def ix(self, *args, **kwargs):
        """ Pandas Indexing.  Note, this has been modified to ensure that series returns (eg ix[3])
        still maintain attributes.  To remove this behavior, replace the following:
        
        self._ix = _MetaIXIndexer(self, _IXIndexer(self) ) --> self._ix=_IXIndexer(self)
        
        The above works because slicing preserved attributes because the _IXIndexer is a python object 
        subclass."""
        if self._ix is None:
            try:
                self._ix = _MetaIXIndexer(self)
            ### New versions of _IXIndexer require "name" attribute.
            except TypeError as TE:
                self._ix = _MetaIXIndexer(self, "ix")
        return self._ix

    @property
    def iloc(self, *args, **kwargs):
        """ See pandas.Index.iloc; preserves metadata"""
        if self._iloc is None:
            try:
                self._iloc = _MetaiLocIndexer(self)
            ### New versions of _IXIndexer require "name" attribute.
            except TypeError as TE:
                self._iloc = _MetaiLocIndexer(self, "iloc")
        return self._iloc

    @property
    def loc(self, *args, **kwargs):
        """See pandas.Index.loc; preserves metadata"""
        if self._loc is None:
            try:
                self._loc = _MetaLocIndexer(self)
            ### New versions of _IXIndexer require "name" attribute.
            except TypeError as TE:
                self._loc = _MetaLocIndexer(self, "loc")
        return self._loc
Example #3
0
class MetaDataframe(object):
    """ Provides composition class that is essentially stores a DataFrame; however, all methods/attributes of the dataframe
    are directly accessible by the user.  As such, this object "quacks" like a dataframe, but is merely a Python object.  Thus,
    it can be subclassed easily and also has persistent custom attributes."""

    def __init__(self, *dfargs, **dfkwargs):
        """ Stores a dataframe under reserved attribute name, self._df"""
        self._df = DataFrame(*dfargs, **dfkwargs)
        self.a = 50

    ### Save /Load methods
    def save(self, outname):
        """ Takes in str or opened file and saves. cPickle.dump wrapper."""
        if isinstance(outname, basestring):
            outname = open(outname, "w")
        cPickle.dump(self, outname)

    def dumps(self):
        """ Output TimeSpectra into a pickled string in memory."""
        return cPickle.dumps(self)

    def deepcopy(self):
        """ Make a deepcopy of self, including the dataframe."""
        return copy.deepcopy(self)

    def as_dataframe(self):
        """ Convience method to return a raw dataframe, self._df"""
        return self._df

    # ----------------------------------------------------------------------
    # Overwrite Dataframe methods and operators

    def __getitem__(self, key):
        """ Item lookup"""
        return self._df.__getitem__(key)

    def __setitem__(self, key, value):
        self._df.__setitem__(key, value)

    ### These tell python to ignore __getattr__ when pickling; hence, treat this like a normal class
    def __getstate__(self):
        return self.__dict__

    def __setstate__(self, d):
        self.__dict__.update(d)

    def __getattr__(self, attr, *fcnargs, **fcnkwargs):
        """ Tells python how to handle all attributes that are not found.  Basic attributes 
        are directly referenced to self._df; however, instance methods (like df.corr() ) are
        handled specially using a special private parsing method, _dfgetattr()."""

        ### Return basic attribute
        refout = getattr(self._df, attr)
        if not isinstance(refout, MethodType):
            return refout

        ### Handle instance methods using _dfgetattr().
        ### see http://stackoverflow.com/questions/3434938/python-allowing-methods-not-specifically-defined-to-be-called-ala-getattr
        else:
            return functools.partial(self._dfgetattr, attr, *fcnargs, **fcnkwargs)
            ### This is a reference to the fuction (aka a wrapper) not the function itself

    def _deepcopy(self, dfnew):
        """ Copies all attribtues into a new object except has to store current dataframe
        in memory as this can't be copied correctly using copy.deepcopy.  Probably a quicker way...

        dfnew is used if one wants to pass a new dataframe in.  This is used primarily in calls from __getattr__."""
        ### Store old value of df and remove current df to copy operation will take
        olddf = self._df.copy(deep=True)
        self._df = None

        ### Create new object and apply new df
        newobj = copy.deepcopy(self)
        newobj.df = dfnew

        ### Restore old value of df and return new object
        self._df = olddf
        return newobj

    def _dfgetattr(self, attr, *fcnargs, **fcnkwargs):
        """ Called by __getattr__ as a wrapper, this private method is used to ensure that any
        DataFrame method that returns a new DataFrame will actually return a TimeSpectra object
        instead.  It does so by typechecking the return of attr().

        **kwargs: use_base - If true, program attempts to call attribute on the baseline.  Baseline ought
        to be maintained as a series, and Series/Dataframe API's must be same.

        *fcnargs and **fcnkwargs are passed to the dataframe method.

        Note: tried to ad an as_new keyword to do this operation in place, but doing self=dfout instead of return dfout
        didn't work.  Could try to add this at the __getattr__ level; however, may not be worth it."""

        out = getattr(self._df, attr)(*fcnargs, **fcnkwargs)

        ### If operation returns a dataframe, return new TimeSpectra
        if isinstance(out, DataFrame):
            dfout = self._deepcopy(out)
            return dfout

        ### Otherwise return whatever the method return would be
        else:
            return out

    def __repr__(self):
        """ Can be customized, but by default, reutrns the output of a standard Dataframe."""
        return self._df.__repr__()

    @property
    def ix(self):
        return self._deepcopy(self._df.ix)

    ### Operator overloading ####
    ### In place operations need to overwrite self._df
    def __add__(self, x):
        return self._deepcopy(self._df.__add__(x))

    def __sub__(self, x):
        return self._deepcopy(self._df.__sub__(x))

    def __mul__(self, x):
        return self._deepcopy(self._df.__mul__(x))

    def __div__(self, x):
        return self._deepcopy(self._df.__div__(x))

    def __truediv__(self, x):
        return self._deepcopy(self._df.__truediv__(x))

    ### From what I can tell, __pos__(), __abs__() builtin to df, just __neg__()
    def __neg__(self):
        return self._deepcopy(self._df.__neg__())

    ### Object comparison operators
    def __lt__(self, x):
        return self._deepcopy(self._df.__lt__(x))

    def __le__(self, x):
        return self._deepcopy(self._df.__le__(x))

    def __eq__(self, x):
        return self._deepcopy(self._df.__eq__(x))

    def __ne__(self, x):
        return self._deepcopy(self._df.__ne__(x))

    def __ge__(self, x):
        return self._deepcopy(self._df.__ge__(x))

    def __gt__(self, x):
        return self._deepcopy(self._df.__gt__(x))

    def __len__(self):
        return self._df.__len__()

    def __nonzero__(self):
        return self._df.__nonzero__()

    def __contains__(self, x):
        return self._df.__contains__(x)

    def __iter__(self):
        return self._df.__iter__()