Ejemplos de PCA.fit en Python

Lenguaje de programación: Python

Namespace/Package Name: pca_lite

Clase / Tipo: PCA

Método / Función: fit

Ejemplos en hotexamples.com: 3

Python PCA.fit - 3 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de pca_lite.PCA.fit extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

fit_transform(2)

fit(2)

PCA(1)

Métodos usados con frecuencia

fit_transform (2)

fit (2)

PCA (1)

Ejemplo n.º 1

Mostrar archivo

class Corr2d(object):
    """ Computed 2d correlation spectra, including synchronous and asynchronus,
    correlation, disrelation and other spectra given a 2d data matrix, index
    and columns.  Index and columns are necessary for plotting, so made them
    a mandatory requirement."""

    # Columns aren't used; should I eliminate
    def __init__(self, spec, refspec=None):
        """ refspec is if you want custom centering.  """
        if spec.ndim != 2:
            raise CorrError('Data must be 2d!')

        if not isinstance(spec, MetaDataFrame):
            raise CorrError('Corr2d requires skspec data structures (Metadataframe,'
                            'Spectra, etc... got %s') % type(data)


        # MAKE AN ACTUAL COPY OF DATA, NOT PASSING BY REFERENCE
        self.spec = spec.deepcopy()

        # Promote spec attributes for convenience
        self.index = spec.index   
        self.columns = spec.columns
        self.specunit = spec.specunit
        self.varunit = spec.varunit

        # Better to store than compute as a property over and over
        self._noda = noda_matrix(self.M)

        # Defaults
        self._scaled = False
        self.alpha = 0.8
        self.beta = 0.0
        self._PCA = None

        # Ref spectrum/dynamic spectrum/centering
        if refspec is not None:
            # QUICKEST VAILDATION OF REF_SPEC (WHY CLASS NOT WORKING WIT NP.NDARRAY)
            
            #INSTEAD OF TYPE CHECK, JUST FORCE CONVERT BY DOING array(REFSPEC)
            refspec = np.array(refspec)
            if refspec.shape != self.index.shape:
                raise CorrError('Shape mismatch: spectral data index %s and'
                                ' reference spec shape %.' % \
                                (self.index.shape, refspec.shape))

            # Ref spectrum must be stored as an array for subtraction to work 
            # as defined here!
            self.ref_spectrum = np.array(refspec)           
            self._center = 'Pre-centered'
            self.dyn_spec = self.spec.subtract(self.ref_spectrum, axis=0)

        else:
            self.set_center('mean')


    # Scaling and Centering
    # ---------------------

    @property
    def _scale_string(self):
        """ Current state of scalling, used in __repr__ and plot """
        if self._scaled:
            return '(a=%s, b=%s)' % (self.alpha, self.beta)
        return 'False'


    def scale(self, *args, **kwargs):
        """Scale the synchronous and asynchronous spectra via REF 2
        based on generalized exponential parameters.  Scaling alpha will enhance
        the fine detail of the correlations, but also the noise.  Enhancing beta
        can screen the fine details and enhance the primary correlations.  Alpha
        0.8 and beta 0.0 are suggested as optimal tradeoff between fine correlation
        enhancement and low noise.
        """
        if args:
            if len(args) > 1:
                raise CorrError('Please use keywords (alpha=..., beta=...) to'
                                'avoid ambiguity.') 
                # Make that a custom exception
            if args[0] == True:
                if self._scaled == True:
                    logger.warn("Data already scaled!")
                self._scaled = True
            elif args[0] == False:
                if self._scaled == False:
                    logger.warn("Data already unscaled!")
                self._scaled = False
            else:
                raise CorrError('Argument "%s" not understood; please use True or False.')

        else:
            self._scaled = True

        self.alpha = kwargs.pop('alpha', self.alpha)
        self.beta = kwargs.pop('beta', self.beta)
        if self.alpha > 1 or self.beta > 1:
            logger.warn('Alpha/Beta lose meaning off of range 0-1.')


    @property
    def center(self):
        return self._center
    

    def set_center(self, style, *args, **kwargs):
        """ User sets centering, this updates the  """

        try:

            if not style:
                self._center = None
                # Instead of np.zeros, I will just multiply 0 times a column
                # To get spectrum of 0's
                ref_spectrum = self.spec[0] * 0.0  

            elif style == 'mean':
                self._center = 'mean'
                ref_spectrum = self.spec.mean(axis=1)

            # style is understood as a function, but not inspected
            else:            
                self._center = 'custom fcn.'           
                ref_spectrum = style(self.spec, *args, **kwargs)

        except Exception:

            raise CorrError('Center requires style of "mean", None or a '
                            ' a function, got "%s".' % style)

        self.ref_spectrum = np.array(ref_spectrum)

        if len(self.ref_spectrum) != self.shape[0]:
            raise CorrError('ref. spectrum should be of spectral length (%s)'
                            ' got "%s".' % (self.shape[0], len(self.ref_spectrum)))

        # Set dynamic spectrum.  Should just be able to subtract but numpy messing up        
        self.dyn_spec = self.spec.subtract(self.ref_spectrum, axis=0)


    @property
    def shape(self):
        return self.spec.shape     

    @property
    def M(self):
        return self.shape[1]

    # Numpy Arrays
    # ------------

    @property
    def sync_noscale(self):
        """ Return unscaled, synchronous spectrum as a numpy array. """
        return np.dot(self.dyn_spec, self._dynconjtranspose) / (self.M - 1.0)  #ORDER OF OPERATIONS DEPENDENT (aka np.dot(t_dyn, dyn) doesn't work)


    @property
    def async_noscale(self):
        """ """
        return np.dot(self.dyn_spec, np.dot(self._noda, self._dynconjtranspose) ) / (self.M-1.0)


    @property
    def coeff_corr(self):
        """ Correlation coefficient (pg 78) """   
        return np.divide(self.sync_noscale, self.joint_var) 


    @property
    def coeff_disr(self):
        """ Disrelation coefficient (pg 79) """
        # Not the same as np.sqrt( 1 - coef_corr**2), only same in magnitude!
        return np.divide(self.async_noscale, self.joint_var)

    @property
    def joint_var(self):
        """ Product of standard devations of dynamic spectrum. 
        s1 * s2 or sqrt(siag(sync*sync)).
        """
        std = self.dyn_spec.std(axis=1) #sigma(lambda)
        return np.outer(std, std)
        #return Spec2d(np.outer(std, std),
                      #corr2d = self,
                      #name='Joint Variance',
                      #iunit='variance')


    # 11/10/14
    # I confirmed that these are equivalent to book definitions from 
    # diagonals of synchronous spectrum!  IE std = sqrt(diag(sync*sync))
    # and var = diag(sync * sync)
    # std is actually > var cuz var <1 so sqrt makes larger

    @property
    def _dynconjtranspose(self):
        """ Dynamic spectrum conjugate transpose; helpful to be cached"""
        return np.conj(self.dyn_spec).transpose()


    # 2D Correlation Spectra
    # ----------------------
    @property
    def sync(self):
        """ """
        if self._scaled:
            matrixout = self.sync_noscale * self.joint_var**(-1.0 * self.alpha) * \
                abs(self.coeff_corr)**(self.beta)
                    # ** faster than np.power but abs and np.abs same        
        else:
            matrixout = self.sync_noscale

        return Spec2d.from_corr2d(matrixout, 
                      corr2d = self,
                      name='Synchronous Correlation',
                      iunit='synchronicity')   

    @property
    def async(self):
        """ """     
        if self._scaled:
            matrixout = self.async_noscale * self.joint_var**(-1.0 * self.alpha) * \
                abs(self.coeff_disr)**(self.beta)
        else:
            matrixout = self.async_noscale

        return Spec2d.from_corr2d(matrixout, 
                      corr2d = self,
                      name='Asynchronous Correlation',
                      iunit='asynchronicity')   

    @property
    def phase(self):
        """ Global phase angle (pg 79).  This will use scaled data."""
        phase = np.arctan(self.async/self.sync)
        phase.name = 'Phase Map' 
        phase.iunit = 'phase angle'
        return phase    
    
    
    @property
    def modulous(self):
        """ Effective lengh the vector with components Sync/Async"""
        modulous = np.sqrt(self.sync**2 + self.async**2)
        modulous.name = 'Modulous'
        modulous.iunit = 'mod'
        return modulous
        

    @property
    def correlation(self):
        """ 2D Correlation Spectrum"""
        return Spec2d.from_corr2d(self.coeff_corr, 
                      corr2d = self,
                      name = 'Correlation Coefficient',
                      iunit='corr. coefficient')                

    @property
    def disrelation(self):
        """ 2D Disrelation Spectrum"""
        return Spec2d.from_corr2d(self.coeff_disr,
                      corr2d = self,
                      name = 'Disrelation Coefficient',
                      iunit='disr. coefficient')   


    # 2DCodistribution Spectroscopy
    # -----------------------------
    @property
    def char_index(self):
        """ Characteristic index.  In Ref. [2], this is the 
        characteristic time, and is equation 6.

        Returns: Spectrum of length equivalent to spectral index.
        """
        m = self.M

#        if self._center is None: #pre-center also has this case
        if np.count_nonzero(self.ref_spectrum) == 0:
            raise CorrError('CoDistribution divides by ref spectrum.  If'
                            ' not centring, the ref spec is 0 and you get infinities!')
        coeff = 1.0 / (m * self.ref_spectrum)

        summation = 0 
        k_matrix = np.empty(m)
        for k in range(1, m+1): #m+1 to include m in sum
            k_matrix.fill(k) # in place
            # df.mul is same as dotting:
                #http://stackoverflow.com/questions/15753916/dot-products-in-pandas
            summation += self.dyn_spec.dot(k_matrix) + ((m+1) / 2)
        return coeff * summation

    @property
    def char_perturb(self):
        """ Characteristic index.  In Ref. [2], this is the 
        characteristic time, and is equation 6.

        Returns: Spectrum of length equivalent to spectral index.
        """
        tm, t1 = self.columns[-1], self.columns[0]
        Kj = self.char_index
        
        return ((tm-t1) * ((Kj-1) / (self.M -1))) + t1


    # TO	VECTORIZE:
    
    @property
    def async_codist(self):
        """ Asynchronous codistribution """
        
        # Empty asyn matrix
        numrows = self.shape[0]        
        async = np.empty((numrows,numrows))

        tm, t1 = self.columns[-1], self.columns[0]
        
        # Numpy arrays to speed up loop/indexer?
        tbar = self.char_perturb.values
        var = self.joint_var
        
        # broadcast this?
        for i in range(numrows):
            for j in range(numrows):
                coeff = (tbar[j] - tbar[i]) / (tm -t1)
                # I believe std[i] std[j] is correct way
                async[i][j] = coeff * var[i,j]

        return Spec2d.from_corr2d(async, 
                      corr2d=self, 
                      name='Asynchronous Codistribution', 
                      iunit='asynchronicity')
    
    @property
    def sync_codist(self):
        """ Syncrhonous codistribution.  Computed from asyn_codist"""
        numrows = self.shape[0]                    

        # Numpy arrays to speed up calculation
        var = self.joint_var
        async_cod = self.async_codist.values
        
        sync = np.empty((numrows,numrows))
        for i in range(numrows):
            for j in range(numrows):
                sync[i][j] = np.sqrt(var[i,j]**2 - async_cod[i,j]**2 )
                 
        return Spec2d.from_corr2d(sync, 
                      corr2d=self, 
                      name='Synchronous Codistribution', 
                      iunit='synchronicity')
    

    def plot(self, **pltkwargs):
        """ Quad plot shows several kinds of correlation plots."""
        return corr_multi(self, **pltkwargs)
        

    def _pcagate(self, attr):
        """ Raise an error if use calls inaccessible PCA method."""
        if not self._PCA:
            raise CorrError('Please run .pca_fit() method before '
                            'calling %s.%s' % self.__class__.__name__, attr)    

    def pca_fit(self, n_components=None, fit_transform=True):# k=None, kernel=None, extern=False):           
        """         
        Adaptation of Alexis Mignon's pca.py script

        Adapted to fit skspec 5/6/2013.  
        Original credit to Alexis Mignon:
        Module for Principal Component Analysis.

        Author: Alexis Mignon (c)
        Date: 10/01/2012
        e-mail: [email protected]
        (https://code.google.com/p/pypca/source/browse/trunk/PCA.py)

        Constructor arguments:
        * k: number of principal components to compute. 'None'
             (default) means that all components are computed.
        * kernel: perform PCA on kernel matrices (default is False)
        * extern: use extern product to perform PCA (default is 
               False). Use this option when the number of samples
               is much smaller than the number of features.            

        See pca.py constructor for more info.

        This will initialize PCA class and fit current values of timespectra.

        Notes:
        ------
        The pcakernel.py module is more modular.  These class methods
        make it easier to perform PCA on a timespectra, but are less 
        flexible than using the module functions directly.

        timespectra gets transposed as PCA module expects rows as 
        samples and columns as features.

        Changes to timespectra do not retrigger PCA refresh.  This 
        method should be called each time changes are made to the data.
        """
        
        # NOW USES DYNSPEC BUT DID NOT TEST BEFORE CHANGING
        if self.center:
            logger.warn('Builtin PCA will perform mean-centering on'
                        ' data.  Data is not mean centered yet.')
        self._PCA = PCA(n_components=n_components)                
        if fit_transform:
            return self._PCA.fit_transform(self.dyn_spec)#.transpose())
        else:    
            self._PCA.fit(self.dyn_spec)#.transpose())


    @property
    def PCA(self):
        """ Return the full PCA class object"""
        self._pcagate('pca')
        return self._PCA 

    @property
    def pca_evals(self):
        self._pcagate('eigen values')
        # Index is not self.columns because eigenvalues are still computed with
        # all timepoints, not a subset of the columns        
        return self._PCA.eigen_values_

    @property
    def pca_evecs(self):
        self._pcagate('eigen vectors')
        return self._PCA.eigen_vectors_

    def load_vec(self, k):
        """ Return loading vector series for k.  If k > number of components
            computed with runpca(), this raises an error rather than 
            recomputing.
        """
        self._pcagate('load_vec')
        if k > len(self.shape[1]):
            raise CorrError('Principle components must be <= number'
                            'of samples %s'% self.shape[1])

        # Decided to put impetus on user to recompute when not using enough principle components
        # rather then trying to figure out logic of all use cases.
        # If k > currently stored eigenvectors, recomputes pca
        if self._PCA._k:
            if k > len(self.pca_evals):   
                logger.warn('Refitting, only %s components were computed'
                            'originally' % self._PCA._k)
                self.pca_fit(n_components=k, fit_transform=False)

        return self._PCA.eigen_vectors_[:,k]


    def __repr__(self):
        """ Aligned columns like pyparty.multicanvas """
        pad = pvconfig.PAD
        address = super(Corr2d, self).__repr__().split()[-1].strip("'").strip('>')

        outstring = '%s (%s X %s) at %s:\n' % (self.__class__.__name__,
                                               self.shape[0], self.shape[1], address)

        #Units
    #     outstring += '%sUnits -->  %s X %s\n' % (pad, self.specunit.lower(), self.varunit.lower())

        #Centering
        outstring += '%sCentering -->  %s\n' % (pad, self.center)

        #Scaling
        if self._scaled:
            outstring += '%sScaled    -->  %s\n' % (pad, self._scale_string)
        else:
            outstring += '%sScaled    -->  %s\n' % (pad, self._scaled)        

        outstring += '%sUnits     -->  [%s X %s]' % (pad, 
                                                     self.specunit.lower(), 
                                                     self.varunit.lower())
        return outstring

Ejemplo n.º 2

Mostrar archivo

Archivo: corr.py Proyecto: KasparSnashall/pyuvvis

class Corr2d(object):
    """ Computed 2d correlation spectra, including synchronous and asynchronus,
    correlation, disrelation and other spectra given a 2d data matrix, index
    and columns.  Index and columns are necessary for plotting, so made them
    a mandatory requirement."""

    # Columns aren't used; should I eliminate
    def __init__(self, data, index, columns, idx_unit = 'index', col_unit='col',
                 centered=False):
        """  """
        if data.ndim != 2:
            raise CorrError('Data must be 2d Matrix.')

        # Array typecheck?
        self.data = data
        self.index = index   #Relax these maybe and just hide some sideplots...
        self.columns = columns
        self.idx_unit = idx_unit
        self.col_unit = col_unit

        # Defaults
        self._scaled = False
        self._alpha = 0.8
        self._beta = 0.0
        self._PCA = None

        self._centered = False
        if centered:
            if centered == True:
                self._centered = True
            else:
                self._centered = str(centered)  #User can say "max centered"


    def scale(self, *args, **kwargs):
        """Scale the synchronous and asynchronous spectra via REF 2
        based on generalized exponential parameters.  Scaling alpha will enhance
        the fine detail of the correlations, but also the noise.  Enhancing beta
        can screen the fine details and enhance the primary correlations.  Alpha
        0.8 and beta 0.0 are suggested as optimal tradeoff between fine correlation
        enhancement and low noise.
        """
        if args:
            if len(args) > 1:
                raise CorrError('Please use keywords (alpha=..., beta=...) to'
                                'avoid ambiguity.') 
                # Make that a custom exception
            if args[0] == True:
                if self._scaled == True:
                    logger.warn("Data already scaled!")
                self._scaled = True
            elif args[0] == False:
                if self._scaled == False:
                    logger.warn("Data already unscaled!")
                self._scaled = False
            else:
                raise CorrError('Argument "%s" not understood; please use True or False.')

        else:
            self._scaled = True

        self._alpha = kwargs.pop('alpha', self._alpha)
        self._beta = kwargs.pop('beta', self._beta)


    def center(self, style='mean'):  #Just call mean centered?
        """ Mean centers data.  Mean centering is defined columnwise, and while
        this can be down by a call to dataframe.subtract(x.mean(axis=1), axis=0),
        that requries a pandas dataframe method.  Instead, we transpose the data,
        subtract the mean, then transpose again. (confirmed equivalent)"""
        if self._centered:
            logger.warn('Data is already centered.') #Better than a warning I think
                    #In case user sets centered ot 'max' or something in __init__
        else:
            if style == 'mean':
                # Alternate way, pandas dependent
                # self.data = self.data.subtract(self.data.mean(axis=1), axis=0)
                data_trans = self.data.transpose()
                self.data = (data_trans - data_trans.mean()).transpose()
                self._centered = True
            else:
                raise NotImplementedError('mean centering only supported')

    # Used internally; for example in calculation of coeff_corr, need unscaled
    @property
    def synchronous_noscale(self):
        """ """
        m = self.data.shape[1]  # columns        
        return np.dot(self.data, self._dynconjtranspose) / (m - 1.0)  #ORDER OF OPERATIONS DEPENDENT (aka np.dot(t_dyn, dyn) doesn't work)


    @property
    def asynchronous_noscale(self):
        """ """
        m = self.data.shape[1]  # columns                
        return np.dot(self.data, np.dot(self._noda, self._dynconjtranspose) ) / (m-1.0)
        

    @property
    def synchronous(self):
        """ """
        if self._scaled:
            return self.synchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \
                   abs(self.coeff_corr)**(self._beta)
                    # ** faster than np.power but abs and np.abs same        
        else:
            return self.synchronous_noscale

    @property
    def asynchronous(self):
        """ """     
        if self._scaled:
            return self.asynchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \
               abs(self.coeff_disr)**(self._beta)
        else:
            return self.asynchronous_noscale

    @property
    def coeff_corr(self):
        """ Correlation coefficient (pg 78) """   
        return np.divide(self.synchronous_noscale, self.data.std(axis=1))

    @property
    def coeff_disr(self):
        """ Disrelation coefficient (pg 79) """
        # Not the same as np.sqrt( 1 - coef_corr**2), only same in magnitude!
        return np.divide(self.asynchronous_noscale, self.data.std(axis=1))


    @property
    def phase_angle(self):
        """ Global phase angle (pg 79).  This will use scaled data."""
        return np.arctan(self.asynchronous/self.synchronous)
           

    @property
    def _noda(self):
        """ Store noda matrix of data; depends of number of columns in 
        data.
        """
        return noda_matrix(self.shape[1])

    @property
    def _dynconjtranspose(self):
        """ Dynamic spectrum conjugate transpose; helpful to be cached"""
        return np.conj(self.data).transpose()


    # Do I want xx, yy in here?
    def plot(self, attr='synchronous', sideplots='mean', annotate=True,
             **plotkwargs):
        """ Visualize synchronous, asynchronous or phase angle spectra.
        
        Parameters
        ----------
        
        attr: str attribute (e.g. 'synchronous') or numpy 2d array
            Select which correlation spectra to plot.  Choose from 'sync', 
            'async' or 'phase' for synchronous, asynchronous and phase_angle
            matricies.  In addition, can pass a custom matrix.  This is
            mainly for use case of plotting arithmetic operaitons on sync, 
            asynch and other matricies.  For example, if one wants to plot
            the squared synchronous spectrum, they can square the matrix,
            pass it back into this plotting funciton, and the index, titles
            and so forth will all be preserved.  See examples/documention.

        contours: int (20)
            Number of contours to display.
            
        sideplots: str or bool ('mean')
            If True, sideplots will be put on side axis of cross plots.  Use
            'empty' to return blank sideplots.  mean', 'min', 'max', will 
            plot these respective spectra on the sideplots.
            
        annotate: bool (True)
            Adds some default title and x/y labels and text to plot.
            Setting false is shortcut to removing them all
        
        cbar : str or bool (False)
            Add a colorbar to the plot.  Set cbar to 'top', 'bottom', 'left'
            or 'right' to control position.
            
        colormap : str or bool ('jet')
            Color map to apply to the contour plot.
    
            
        grid : bool (True)
            Apply a grid to the contour and sideplots
            
        fill : bool (True)
            Contours are lines, or filled regions.
            
        **plotkwargs: dict
            Any valid matplotlib contour plot keyword, as well as xlabel, ylabel
            and title for convenience.            

        Returns
        -------
        
        tuple (matplotlib.Axes)
            If side plots, returns (ax1, ax2, ax3, ax4)
            If not side plots, returns ax4 only

        """

        # if user passes matrix instead of a string
        if not isinstance(attr, str):
            attr_title = 'Custom' 
            # Need to dataframe convert or mean/min/max syntax won't work (sideplots)
            if not isinstance(attr, pandas.DataFrame):
                try:               
                    data = pandas.DataFrame(attr) #Don't set index/columns; 
                except Exception:
                    raise CorrError('Could not convert data of type %s to '
                                    'DataFrame.  This is used for various'
                                    'subroutins including sideplot mean/max.')
                                               
            
        elif attr in ['sync', 'synchronous']:
            attr_title = 'Synchronous' #For plot
            data = getattr(self, 'synchronous')
            
        elif attr in ['async', 'asynchronous']:
            attr_title = 'Asynchronous' #For plot
            data = getattr(self, 'asynchronous')

        elif attr in ['phase', 'phase_angle']:
            data = getattr(self, 'phase_angle')
            attr_title = 'Phase Angle' #For plot

        else:
            # Make better
            raise Corr2d('Valid plots include "sync", "async", "phase".'
                         'Alternatively, pass a custom matrix.')
        
        linekwds = dict(linewidth=1, 
                         linestyle='-', 
                         color='black')

        # Only set defaults for labels/title if annotate        
        if annotate:
            plotkwargs.setdefault('xlabel', self.idx_unit)
            plotkwargs.setdefault('ylabel', self.idx_unit)       


            # Title
            cols = self.columns        
            try:
                plotkwargs.setdefault('title', '%s (%.2f - %.2f %s)' % 
                                  ( attr_title, cols.min(), cols.max(), self.col_unit.lower()))
    
            # Working with timestamps (leave in year?)
            except TypeError:
                if self.col_unit.lower() == 'timestamp': #Bit of a hack
                    plotkwargs.setdefault('title', '%s (%s - %s)' % 
                             ( attr_title, 
                               #str(cols.min()).split()[1],  #Cut out year
                               #str(cols.max()).split()[1])
                               cols.min(),
                               cols.max())
                             )           
    
                # Full string format, not alteration of timestamp values
                else:
                    plotkwargs.setdefault('title', '%s (%s - %s %s)' % 
                              ( attr_title, cols.min(), cols.max(), self.col_unit.lower()))   


        # MAKE A DICT THAT RENAMES THESE synchronous: Synchronous Spectrm
        # phase_angle or 'phase' or w/e to: "Phase Anlge" (sans spectrum)
        xx, yy = np.meshgrid(self.index, self.index)
        
        if sideplots:
            
            if sideplots == True:
                sideplots = 'mean'
            
            if self._centered:
                symbol = self.index._unit.symbol
                label1 = r'$\bar{A}(%s_1)$' % symbol
                label2 = r'$\bar{A}(%s_2)$' % symbol
                
            else:
                label1, label2 = r'$A(%s_1)$' % symbol, r'$A(%s_2)$' % symbol

            ax1, ax2, ax3, ax4 = _gencorr2d(xx, yy, data, 
                                            label1, label2, **plotkwargs )
            
            # Problem here: this is calling plot method of
            if sideplots == 'mean':
                ax2.plot(self.index, data.mean(axis=1), **linekwds)
                ax3.plot(self.index, data.mean(axis=1),  **linekwds)     
                
            elif sideplots == 'max':
                ax2.plot(self.index, data.max(axis=1), **linekwds)
                ax3.plot(self.index, data.max(axis=1),  **linekwds)    
                
            elif sideplots == 'min':
                ax2.plot(self.index, data.min(axis=1), **linekwds)
                ax3.plot(self.index, data.min(axis=1),  **linekwds)    
                
            
            elif sideplots == 'empty':
                pass
            
            else:
                raise Corr2d('sideplots keyword must be "mean", "max", "min",'
                    ' or "empty".')
        
            # Reorient ax3
            pvutil.invert_ax(ax3)

            if sideplots != 'empty':
                ax2.set_ylabel(sideplots)
                ax2.yaxis.set_label_position('right')
            
            return (ax1, ax2, ax3, ax4)


        else:
            return _gen2d(xx, yy, data, **plotkwargs)[0] #return axes, not contours

    @property
    def shape(self):
        return self.data.shape          

    
    def _pcagate(self, attr):
        """ Raise an error if use calls inaccessible PCA method."""
        if not self._PCA:
            raise CorrError('Please run .pca_fit() method before '
                 'calling %s.%s' % self.__class__.__name__, attr)    
        
    def pca_fit(self, n_components=None, fit_transform=True):# k=None, kernel=None, extern=False):           
        """         
        Adaptation of Alexis Mignon's pca.py script
        
        Adapted to fit PyUvVis 5/6/2013.  
        Original credit to Alexis Mignon:
        Module for Principal Component Analysis.

        Author: Alexis Mignon (c)
        Date: 10/01/2012
        e-mail: [email protected]
        (https://code.google.com/p/pypca/source/browse/trunk/PCA.py)
                    
        Constructor arguments:
        * k: number of principal components to compute. 'None'
             (default) means that all components are computed.
        * kernel: perform PCA on kernel matrices (default is False)
        * extern: use extern product to perform PCA (default is 
               False). Use this option when the number of samples
               is much smaller than the number of features.            

        See pca.py constructor for more info.
        
        This will initialize PCA class and fit current values of timespectra.
        
        Notes:
        ------
        The pcakernel.py module is more modular.  These class methods
        make it easier to perform PCA on a timespectra, but are less 
        flexible than using the module functions directly.
    
        timespectra gets transposed as PCA module expects rows as 
        samples and columns as features.
        
        Changes to timespectra do not retrigger PCA refresh.  This 
        method should be called each time changes are made to the data.
        """
        if self._centered != True:
            logger.warn('Builtin PCA will perform mean-centering on'
                        ' data.  Data is not mean centered yet.')
        self._PCA = PCA(n_components=n_components)                
        if fit_transform:
            return self._PCA.fit_transform(self.data)#.transpose())
        else:    
            self._PCA.fit(self.data)#.transpose())
                
                        
    @property
    def PCA(self):
        """ Return the full PCA class object"""
        self._pcagate('pca')
        return self._PCA 
    
    @property
    def pca_evals(self):
        self._pcagate('eigen values')
        # Index is not self.columns because eigenvalues are still computed with
        # all timepoints, not a subset of the columns        
        return self._PCA.eigen_values_
    
    @property
    def pca_evecs(self):
        self._pcagate('eigen vectors')
        return self._PCA.eigen_vectors_
            
    def load_vec(self, k):
        """ Return loading vector series for k.  If k > number of components
            computed with runpca(), this raises an error rather than 
            recomputing.
        """
        self._pcagate('load_vec')
        if k > len(self.shape[1]):
            raise CorrError('Principle components must be <= number'
                                 'of samples %s'% self.shape[1])

        # Decided to put impetus on user to recompute when not using enough principle components
        # rather then trying to figure out logic of all use cases.
        # If k > currently stored eigenvectors, recomputes pca
        if self._PCA._k:
            if k > len(self.pca_evals):   
                logger.warn('Refitting, only %s components were computed'
                'originally' % self._PCA._k)
                self.pca_fit(n_components=k, fit_transform=False)

        return self._PCA.eigen_vectors_[:,k]
            

    # Alternate constructers
    @classmethod
    def from_spectra(cls, ts, **kwargs):
        kwargs.setdefault('idx_unit',ts.full_specunit), 
        kwargs.setdefault('col_unit',ts.full_varunit),
        return cls(np.array(ts),   
                   ts.index, 
                   ts.columns, 
                   **kwargs)


    def __repr__(self):
        """ Aligned columns like pyparty.multicanvas """
        pad = pvconfig.PAD
        address = super(Corr2d, self).__repr__().split()[-1].strip("'").strip('>')
        
        outstring = '%s (%s X %s) at %s:\n' % (self.__class__.__name__,
             self.shape[0], self.shape[1], address)

        #Units
   #     outstring += '%sUnits -->  %s X %s\n' % (pad, self.idx_unit.lower(), self.col_unit.lower())

        #Centering
        outstring += '%sCentering -->  %s\n' % (pad, self._centered)

        #Scaling
        if self._scaled:
            outstring += '%sScaled    -->  %s (a=%s, b=%s)\n' % \
            (pad, self._scaled, self._alpha, self._beta)
        else:
            outstring += '%sScaled    -->  %s\n' % (pad, self._scaled)        

        outstring += '%sUnits     -->  [%s X %s]' % (pad, self.idx_unit.lower(), self.col_unit.lower())


        return outstring

Ejemplo n.º 3

Mostrar archivo

Archivo: corr.py Proyecto: hugadams/pyuvvis

class Corr2d(object):
    """ Computed 2d correlation spectra, including synchronous and asynchronus,
    correlation, disrelation and other spectra given a 2d data matrix, index
    and columns.  Index and columns are necessary for plotting, so made them
    a mandatory requirement."""

    # Columns aren't used; should I eliminate
    def __init__(self,
                 data,
                 index,
                 columns,
                 idx_unit='index',
                 col_unit='col',
                 centered=False):
        """  """
        if data.ndim != 2:
            raise CorrError('Data must be 2d Matrix.')

        # Array typecheck?
        self.data = data
        self.index = index  #Relax these maybe and just hide some sideplots...
        self.columns = columns
        self.idx_unit = idx_unit
        self.col_unit = col_unit

        # Defaults
        self._scaled = False
        self._alpha = 0.8
        self._beta = 0.0
        self._PCA = None

        self._centered = False
        if centered:
            if centered == True:
                self._centered = True
            else:
                self._centered = str(centered)  #User can say "max centered"

    def scale(self, *args, **kwargs):
        """Scale the synchronous and asynchronous spectra via REF 2
        based on generalized exponential parameters.  Scaling alpha will enhance
        the fine detail of the correlations, but also the noise.  Enhancing beta
        can screen the fine details and enhance the primary correlations.  Alpha
        0.8 and beta 0.0 are suggested as optimal tradeoff between fine correlation
        enhancement and low noise.
        """
        if args:
            if len(args) > 1:
                raise CorrError('Please use keywords (alpha=..., beta=...) to'
                                'avoid ambiguity.')
                # Make that a custom exception
            if args[0] == True:
                if self._scaled == True:
                    logger.warn("Data already scaled!")
                self._scaled = True
            elif args[0] == False:
                if self._scaled == False:
                    logger.warn("Data already unscaled!")
                self._scaled = False
            else:
                raise CorrError(
                    'Argument "%s" not understood; please use True or False.')

        else:
            self._scaled = True

        self._alpha = kwargs.pop('alpha', self._alpha)
        self._beta = kwargs.pop('beta', self._beta)

    def center(self, style='mean'):  #Just call mean centered?
        """ Mean centers data.  Mean centering is defined columnwise, and while
        this can be down by a call to dataframe.subtract(x.mean(axis=1), axis=0),
        that requries a pandas dataframe method.  Instead, we transpose the data,
        subtract the mean, then transpose again. (confirmed equivalent)"""
        if self._centered:
            logger.warn(
                'Data is already centered.')  #Better than a warning I think
            #In case user sets centered ot 'max' or something in __init__
        else:
            if style == 'mean':
                # Alternate way, pandas dependent
                # self.data = self.data.subtract(self.data.mean(axis=1), axis=0)
                data_trans = self.data.transpose()
                self.data = (data_trans - data_trans.mean()).transpose()
                self._centered = True
            else:
                raise NotImplementedError('mean centering only supported')

    # Used internally; for example in calculation of coeff_corr, need unscaled
    @property
    def synchronous_noscale(self):
        """ """
        m = self.data.shape[1]  # columns
        return np.dot(self.data, self._dynconjtranspose) / (
            m - 1.0
        )  #ORDER OF OPERATIONS DEPENDENT (aka np.dot(t_dyn, dyn) doesn't work)

    @property
    def asynchronous_noscale(self):
        """ """
        m = self.data.shape[1]  # columns
        return np.dot(self.data, np.dot(self._noda,
                                        self._dynconjtranspose)) / (m - 1.0)

    @property
    def synchronous(self):
        """ """
        if self._scaled:
            return self.synchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \
                   abs(self.coeff_corr)**(self._beta)
            # ** faster than np.power but abs and np.abs same
        else:
            return self.synchronous_noscale

    @property
    def asynchronous(self):
        """ """
        if self._scaled:
            return self.asynchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \
               abs(self.coeff_disr)**(self._beta)
        else:
            return self.asynchronous_noscale

    @property
    def coeff_corr(self):
        """ Correlation coefficient (pg 78) """
        return np.divide(self.synchronous_noscale, self.data.std(axis=1))

    @property
    def coeff_disr(self):
        """ Disrelation coefficient (pg 79) """
        # Not the same as np.sqrt( 1 - coef_corr**2), only same in magnitude!
        return np.divide(self.asynchronous_noscale, self.data.std(axis=1))

    @property
    def phase_angle(self):
        """ Global phase angle (pg 79).  This will use scaled data."""
        return np.arctan(self.asynchronous / self.synchronous)

    @property
    def _noda(self):
        """ Store noda matrix of data; depends of number of columns in 
        data.
        """
        return noda_matrix(self.shape[1])

    @property
    def _dynconjtranspose(self):
        """ Dynamic spectrum conjugate transpose; helpful to be cached"""
        return np.conj(self.data).transpose()

    # Do I want xx, yy in here?
    def plot(self,
             attr='synchronous',
             sideplots='mean',
             annotate=True,
             **plotkwargs):
        """ Visualize synchronous, asynchronous or phase angle spectra.
        
        Parameters
        ----------
        
        attr: str attribute (e.g. 'synchronous') or numpy 2d array
            Select which correlation spectra to plot.  Choose from 'sync', 
            'async' or 'phase' for synchronous, asynchronous and phase_angle
            matricies.  In addition, can pass a custom matrix.  This is
            mainly for use case of plotting arithmetic operaitons on sync, 
            asynch and other matricies.  For example, if one wants to plot
            the squared synchronous spectrum, they can square the matrix,
            pass it back into this plotting funciton, and the index, titles
            and so forth will all be preserved.  See examples/documention.

        contours: int (20)
            Number of contours to display.
            
        sideplots: str or bool ('mean')
            If True, sideplots will be put on side axis of cross plots.  Use
            'empty' to return blank sideplots.  mean', 'min', 'max', will 
            plot these respective spectra on the sideplots.
            
        annotate: bool (True)
            Adds some default title and x/y labels and text to plot.
            Setting false is shortcut to removing them all
        
        cbar : str or bool (False)
            Add a colorbar to the plot.  Set cbar to 'top', 'bottom', 'left'
            or 'right' to control position.
            
        colormap : str or bool ('jet')
            Color map to apply to the contour plot.
    
            
        grid : bool (True)
            Apply a grid to the contour and sideplots
            
        fill : bool (True)
            Contours are lines, or filled regions.
            
        **plotkwargs: dict
            Any valid matplotlib contour plot keyword, as well as xlabel, ylabel
            and title for convenience.            

        Returns
        -------
        
        tuple (matplotlib.Axes)
            If side plots, returns (ax1, ax2, ax3, ax4)
            If not side plots, returns ax4 only

        """

        # if user passes matrix instead of a string
        if not isinstance(attr, str):
            attr_title = 'Custom'
            # Need to dataframe convert or mean/min/max syntax won't work (sideplots)
            if not isinstance(attr, pandas.DataFrame):
                try:
                    data = pandas.DataFrame(attr)  #Don't set index/columns;
                except Exception:
                    raise CorrError('Could not convert data of type %s to '
                                    'DataFrame.  This is used for various'
                                    'subroutins including sideplot mean/max.')

        elif attr in ['sync', 'synchronous']:
            attr_title = 'Synchronous'  #For plot
            data = getattr(self, 'synchronous')

        elif attr in ['async', 'asynchronous']:
            attr_title = 'Asynchronous'  #For plot
            data = getattr(self, 'asynchronous')

        elif attr in ['phase', 'phase_angle']:
            data = getattr(self, 'phase_angle')
            attr_title = 'Phase Angle'  #For plot

        else:
            # Make better
            raise Corr2d('Valid plots include "sync", "async", "phase".'
                         'Alternatively, pass a custom matrix.')

        linekwds = dict(linewidth=1, linestyle='-', color='black')

        # Only set defaults for labels/title if annotate
        if annotate:
            plotkwargs.setdefault('xlabel', self.idx_unit)
            plotkwargs.setdefault('ylabel', self.idx_unit)

            # Title
            cols = self.columns
            try:
                plotkwargs.setdefault(
                    'title', '%s (%.2f - %.2f %s)' %
                    (attr_title, cols.min(), cols.max(),
                     self.col_unit.lower()))

            # Working with timestamps (leave in year?)
            except TypeError:
                if self.col_unit.lower() == 'timestamp':  #Bit of a hack
                    plotkwargs.setdefault(
                        'title',
                        '%s (%s - %s)' % (
                            attr_title,
                            #str(cols.min()).split()[1],  #Cut out year
                            #str(cols.max()).split()[1])
                            cols.min(),
                            cols.max()))

                # Full string format, not alteration of timestamp values
                else:
                    plotkwargs.setdefault(
                        'title', '%s (%s - %s %s)' %
                        (attr_title, cols.min(), cols.max(),
                         self.col_unit.lower()))

        # MAKE A DICT THAT RENAMES THESE synchronous: Synchronous Spectrm
        # phase_angle or 'phase' or w/e to: "Phase Anlge" (sans spectrum)
        xx, yy = np.meshgrid(self.index, self.index)

        if sideplots:

            if sideplots == True:
                sideplots = 'mean'

            symbol = self.index._unit.symbol
            if self._centered:
                label1 = r'$\bar{A}(%s_1)$' % symbol
                label2 = r'$\bar{A}(%s_2)$' % symbol

            else:
                label1, label2 = r'$A(%s_1)$' % symbol, r'$A(%s_2)$' % symbol

            ax1, ax2, ax3, ax4 = _gencorr2d(xx, yy, data, label1, label2,
                                            **plotkwargs)

            # Problem here: this is calling plot method of
            if sideplots == 'mean':
                ax2.plot(self.index, data.mean(axis=1), **linekwds)
                ax3.plot(self.index, data.mean(axis=1), **linekwds)

            elif sideplots == 'max':
                ax2.plot(self.index, data.max(axis=1), **linekwds)
                ax3.plot(self.index, data.max(axis=1), **linekwds)

            elif sideplots == 'min':
                ax2.plot(self.index, data.min(axis=1), **linekwds)
                ax3.plot(self.index, data.min(axis=1), **linekwds)

            elif sideplots == 'empty':
                pass

            else:
                raise Corr2d('sideplots keyword must be "mean", "max", "min",'
                             ' or "empty".')

            # Reorient ax3
            pvutil.invert_ax(ax3)

            if sideplots != 'empty':
                ax2.set_ylabel(sideplots)
                ax2.yaxis.set_label_position('right')

            return (ax1, ax2, ax3, ax4)

        else:
            # If no sideplots, can allow for 3d plots
            plotkwargs.setdefault('kind', 'contour')
            return _gen2d3d(xx, yy, data,
                            **plotkwargs)[0]  #return axes, not contours

    @property
    def shape(self):
        return self.data.shape

    def _pcagate(self, attr):
        """ Raise an error if use calls inaccessible PCA method."""
        if not self._PCA:
            raise CorrError(
                'Please run .pca_fit() method before '
                'calling %s.%s' % self.__class__.__name__, attr)

    def pca_fit(self,
                n_components=None,
                fit_transform=True):  # k=None, kernel=None, extern=False):
        """         
        Adaptation of Alexis Mignon's pca.py script
        
        Adapted to fit PyUvVis 5/6/2013.  
        Original credit to Alexis Mignon:
        Module for Principal Component Analysis.

        Author: Alexis Mignon (c)
        Date: 10/01/2012
        e-mail: [email protected]
        (https://code.google.com/p/pypca/source/browse/trunk/PCA.py)
                    
        Constructor arguments:
        * k: number of principal components to compute. 'None'
             (default) means that all components are computed.
        * kernel: perform PCA on kernel matrices (default is False)
        * extern: use extern product to perform PCA (default is 
               False). Use this option when the number of samples
               is much smaller than the number of features.            

        See pca.py constructor for more info.
        
        This will initialize PCA class and fit current values of timespectra.
        
        Notes:
        ------
        The pcakernel.py module is more modular.  These class methods
        make it easier to perform PCA on a timespectra, but are less 
        flexible than using the module functions directly.
    
        timespectra gets transposed as PCA module expects rows as 
        samples and columns as features.
        
        Changes to timespectra do not retrigger PCA refresh.  This 
        method should be called each time changes are made to the data.
        """
        if self._centered != True:
            logger.warn('Builtin PCA will perform mean-centering on'
                        ' data.  Data is not mean centered yet.')
        self._PCA = PCA(n_components=n_components)
        if fit_transform:
            return self._PCA.fit_transform(self.data)  #.transpose())
        else:
            self._PCA.fit(self.data)  #.transpose())

    @property
    def PCA(self):
        """ Return the full PCA class object"""
        self._pcagate('pca')
        return self._PCA

    @property
    def pca_evals(self):
        self._pcagate('eigen values')
        # Index is not self.columns because eigenvalues are still computed with
        # all timepoints, not a subset of the columns
        return self._PCA.eigen_values_

    @property
    def pca_evecs(self):
        self._pcagate('eigen vectors')
        return self._PCA.eigen_vectors_

    def load_vec(self, k):
        """ Return loading vector series for k.  If k > number of components
            computed with runpca(), this raises an error rather than 
            recomputing.
        """
        self._pcagate('load_vec')
        if k > len(self.shape[1]):
            raise CorrError('Principle components must be <= number'
                            'of samples %s' % self.shape[1])

        # Decided to put impetus on user to recompute when not using enough principle components
        # rather then trying to figure out logic of all use cases.
        # If k > currently stored eigenvectors, recomputes pca
        if self._PCA._k:
            if k > len(self.pca_evals):
                logger.warn('Refitting, only %s components were computed'
                            'originally' % self._PCA._k)
                self.pca_fit(n_components=k, fit_transform=False)

        return self._PCA.eigen_vectors_[:, k]

    # Alternate constructers
    @classmethod
    def from_spectra(cls, ts, **kwargs):
        kwargs.setdefault('idx_unit', ts.full_specunit),
        kwargs.setdefault('col_unit', ts.full_varunit),
        return cls(np.array(ts), ts.index, ts.columns, **kwargs)

    def __repr__(self):
        """ Aligned columns like pyparty.multicanvas """
        pad = pvconfig.PAD
        address = super(Corr2d,
                        self).__repr__().split()[-1].strip("'").strip('>')

        outstring = '%s (%s X %s) at %s:\n' % (
            self.__class__.__name__, self.shape[0], self.shape[1], address)

        #Units
        #     outstring += '%sUnits -->  %s X %s\n' % (pad, self.idx_unit.lower(), self.col_unit.lower())

        #Centering
        outstring += '%sCentering -->  %s\n' % (pad, self._centered)

        #Scaling
        if self._scaled:
            outstring += '%sScaled    -->  %s (a=%s, b=%s)\n' % \
            (pad, self._scaled, self._alpha, self._beta)
        else:
            outstring += '%sScaled    -->  %s\n' % (pad, self._scaled)

        outstring += '%sUnits     -->  [%s X %s]' % (
            pad, self.idx_unit.lower(), self.col_unit.lower())

        return outstring