class Corr2d(object): """ Computed 2d correlation spectra, including synchronous and asynchronus, correlation, disrelation and other spectra given a 2d data matrix, index and columns. Index and columns are necessary for plotting, so made them a mandatory requirement.""" # Columns aren't used; should I eliminate def __init__(self, spec, refspec=None): """ refspec is if you want custom centering. """ if spec.ndim != 2: raise CorrError('Data must be 2d!') if not isinstance(spec, MetaDataFrame): raise CorrError('Corr2d requires skspec data structures (Metadataframe,' 'Spectra, etc... got %s') % type(data) # MAKE AN ACTUAL COPY OF DATA, NOT PASSING BY REFERENCE self.spec = spec.deepcopy() # Promote spec attributes for convenience self.index = spec.index self.columns = spec.columns self.specunit = spec.specunit self.varunit = spec.varunit # Better to store than compute as a property over and over self._noda = noda_matrix(self.M) # Defaults self._scaled = False self.alpha = 0.8 self.beta = 0.0 self._PCA = None # Ref spectrum/dynamic spectrum/centering if refspec is not None: # QUICKEST VAILDATION OF REF_SPEC (WHY CLASS NOT WORKING WIT NP.NDARRAY) #INSTEAD OF TYPE CHECK, JUST FORCE CONVERT BY DOING array(REFSPEC) refspec = np.array(refspec) if refspec.shape != self.index.shape: raise CorrError('Shape mismatch: spectral data index %s and' ' reference spec shape %.' % \ (self.index.shape, refspec.shape)) # Ref spectrum must be stored as an array for subtraction to work # as defined here! self.ref_spectrum = np.array(refspec) self._center = 'Pre-centered' self.dyn_spec = self.spec.subtract(self.ref_spectrum, axis=0) else: self.set_center('mean') # Scaling and Centering # --------------------- @property def _scale_string(self): """ Current state of scalling, used in __repr__ and plot """ if self._scaled: return '(a=%s, b=%s)' % (self.alpha, self.beta) return 'False' def scale(self, *args, **kwargs): """Scale the synchronous and asynchronous spectra via REF 2 based on generalized exponential parameters. Scaling alpha will enhance the fine detail of the correlations, but also the noise. Enhancing beta can screen the fine details and enhance the primary correlations. Alpha 0.8 and beta 0.0 are suggested as optimal tradeoff between fine correlation enhancement and low noise. """ if args: if len(args) > 1: raise CorrError('Please use keywords (alpha=..., beta=...) to' 'avoid ambiguity.') # Make that a custom exception if args[0] == True: if self._scaled == True: logger.warn("Data already scaled!") self._scaled = True elif args[0] == False: if self._scaled == False: logger.warn("Data already unscaled!") self._scaled = False else: raise CorrError('Argument "%s" not understood; please use True or False.') else: self._scaled = True self.alpha = kwargs.pop('alpha', self.alpha) self.beta = kwargs.pop('beta', self.beta) if self.alpha > 1 or self.beta > 1: logger.warn('Alpha/Beta lose meaning off of range 0-1.') @property def center(self): return self._center def set_center(self, style, *args, **kwargs): """ User sets centering, this updates the """ try: if not style: self._center = None # Instead of np.zeros, I will just multiply 0 times a column # To get spectrum of 0's ref_spectrum = self.spec[0] * 0.0 elif style == 'mean': self._center = 'mean' ref_spectrum = self.spec.mean(axis=1) # style is understood as a function, but not inspected else: self._center = 'custom fcn.' ref_spectrum = style(self.spec, *args, **kwargs) except Exception: raise CorrError('Center requires style of "mean", None or a ' ' a function, got "%s".' % style) self.ref_spectrum = np.array(ref_spectrum) if len(self.ref_spectrum) != self.shape[0]: raise CorrError('ref. spectrum should be of spectral length (%s)' ' got "%s".' % (self.shape[0], len(self.ref_spectrum))) # Set dynamic spectrum. Should just be able to subtract but numpy messing up self.dyn_spec = self.spec.subtract(self.ref_spectrum, axis=0) @property def shape(self): return self.spec.shape @property def M(self): return self.shape[1] # Numpy Arrays # ------------ @property def sync_noscale(self): """ Return unscaled, synchronous spectrum as a numpy array. """ return np.dot(self.dyn_spec, self._dynconjtranspose) / (self.M - 1.0) #ORDER OF OPERATIONS DEPENDENT (aka np.dot(t_dyn, dyn) doesn't work) @property def async_noscale(self): """ """ return np.dot(self.dyn_spec, np.dot(self._noda, self._dynconjtranspose) ) / (self.M-1.0) @property def coeff_corr(self): """ Correlation coefficient (pg 78) """ return np.divide(self.sync_noscale, self.joint_var) @property def coeff_disr(self): """ Disrelation coefficient (pg 79) """ # Not the same as np.sqrt( 1 - coef_corr**2), only same in magnitude! return np.divide(self.async_noscale, self.joint_var) @property def joint_var(self): """ Product of standard devations of dynamic spectrum. s1 * s2 or sqrt(siag(sync*sync)). """ std = self.dyn_spec.std(axis=1) #sigma(lambda) return np.outer(std, std) #return Spec2d(np.outer(std, std), #corr2d = self, #name='Joint Variance', #iunit='variance') # 11/10/14 # I confirmed that these are equivalent to book definitions from # diagonals of synchronous spectrum! IE std = sqrt(diag(sync*sync)) # and var = diag(sync * sync) # std is actually > var cuz var <1 so sqrt makes larger @property def _dynconjtranspose(self): """ Dynamic spectrum conjugate transpose; helpful to be cached""" return np.conj(self.dyn_spec).transpose() # 2D Correlation Spectra # ---------------------- @property def sync(self): """ """ if self._scaled: matrixout = self.sync_noscale * self.joint_var**(-1.0 * self.alpha) * \ abs(self.coeff_corr)**(self.beta) # ** faster than np.power but abs and np.abs same else: matrixout = self.sync_noscale return Spec2d.from_corr2d(matrixout, corr2d = self, name='Synchronous Correlation', iunit='synchronicity') @property def async(self): """ """ if self._scaled: matrixout = self.async_noscale * self.joint_var**(-1.0 * self.alpha) * \ abs(self.coeff_disr)**(self.beta) else: matrixout = self.async_noscale return Spec2d.from_corr2d(matrixout, corr2d = self, name='Asynchronous Correlation', iunit='asynchronicity') @property def phase(self): """ Global phase angle (pg 79). This will use scaled data.""" phase = np.arctan(self.async/self.sync) phase.name = 'Phase Map' phase.iunit = 'phase angle' return phase @property def modulous(self): """ Effective lengh the vector with components Sync/Async""" modulous = np.sqrt(self.sync**2 + self.async**2) modulous.name = 'Modulous' modulous.iunit = 'mod' return modulous @property def correlation(self): """ 2D Correlation Spectrum""" return Spec2d.from_corr2d(self.coeff_corr, corr2d = self, name = 'Correlation Coefficient', iunit='corr. coefficient') @property def disrelation(self): """ 2D Disrelation Spectrum""" return Spec2d.from_corr2d(self.coeff_disr, corr2d = self, name = 'Disrelation Coefficient', iunit='disr. coefficient') # 2DCodistribution Spectroscopy # ----------------------------- @property def char_index(self): """ Characteristic index. In Ref. [2], this is the characteristic time, and is equation 6. Returns: Spectrum of length equivalent to spectral index. """ m = self.M # if self._center is None: #pre-center also has this case if np.count_nonzero(self.ref_spectrum) == 0: raise CorrError('CoDistribution divides by ref spectrum. If' ' not centring, the ref spec is 0 and you get infinities!') coeff = 1.0 / (m * self.ref_spectrum) summation = 0 k_matrix = np.empty(m) for k in range(1, m+1): #m+1 to include m in sum k_matrix.fill(k) # in place # df.mul is same as dotting: #http://stackoverflow.com/questions/15753916/dot-products-in-pandas summation += self.dyn_spec.dot(k_matrix) + ((m+1) / 2) return coeff * summation @property def char_perturb(self): """ Characteristic index. In Ref. [2], this is the characteristic time, and is equation 6. Returns: Spectrum of length equivalent to spectral index. """ tm, t1 = self.columns[-1], self.columns[0] Kj = self.char_index return ((tm-t1) * ((Kj-1) / (self.M -1))) + t1 # TO VECTORIZE: @property def async_codist(self): """ Asynchronous codistribution """ # Empty asyn matrix numrows = self.shape[0] async = np.empty((numrows,numrows)) tm, t1 = self.columns[-1], self.columns[0] # Numpy arrays to speed up loop/indexer? tbar = self.char_perturb.values var = self.joint_var # broadcast this? for i in range(numrows): for j in range(numrows): coeff = (tbar[j] - tbar[i]) / (tm -t1) # I believe std[i] std[j] is correct way async[i][j] = coeff * var[i,j] return Spec2d.from_corr2d(async, corr2d=self, name='Asynchronous Codistribution', iunit='asynchronicity') @property def sync_codist(self): """ Syncrhonous codistribution. Computed from asyn_codist""" numrows = self.shape[0] # Numpy arrays to speed up calculation var = self.joint_var async_cod = self.async_codist.values sync = np.empty((numrows,numrows)) for i in range(numrows): for j in range(numrows): sync[i][j] = np.sqrt(var[i,j]**2 - async_cod[i,j]**2 ) return Spec2d.from_corr2d(sync, corr2d=self, name='Synchronous Codistribution', iunit='synchronicity') def plot(self, **pltkwargs): """ Quad plot shows several kinds of correlation plots.""" return corr_multi(self, **pltkwargs) def _pcagate(self, attr): """ Raise an error if use calls inaccessible PCA method.""" if not self._PCA: raise CorrError('Please run .pca_fit() method before ' 'calling %s.%s' % self.__class__.__name__, attr) def pca_fit(self, n_components=None, fit_transform=True):# k=None, kernel=None, extern=False): """ Adaptation of Alexis Mignon's pca.py script Adapted to fit skspec 5/6/2013. Original credit to Alexis Mignon: Module for Principal Component Analysis. Author: Alexis Mignon (c) Date: 10/01/2012 e-mail: [email protected] (https://code.google.com/p/pypca/source/browse/trunk/PCA.py) Constructor arguments: * k: number of principal components to compute. 'None' (default) means that all components are computed. * kernel: perform PCA on kernel matrices (default is False) * extern: use extern product to perform PCA (default is False). Use this option when the number of samples is much smaller than the number of features. See pca.py constructor for more info. This will initialize PCA class and fit current values of timespectra. Notes: ------ The pcakernel.py module is more modular. These class methods make it easier to perform PCA on a timespectra, but are less flexible than using the module functions directly. timespectra gets transposed as PCA module expects rows as samples and columns as features. Changes to timespectra do not retrigger PCA refresh. This method should be called each time changes are made to the data. """ # NOW USES DYNSPEC BUT DID NOT TEST BEFORE CHANGING if self.center: logger.warn('Builtin PCA will perform mean-centering on' ' data. Data is not mean centered yet.') self._PCA = PCA(n_components=n_components) if fit_transform: return self._PCA.fit_transform(self.dyn_spec)#.transpose()) else: self._PCA.fit(self.dyn_spec)#.transpose()) @property def PCA(self): """ Return the full PCA class object""" self._pcagate('pca') return self._PCA @property def pca_evals(self): self._pcagate('eigen values') # Index is not self.columns because eigenvalues are still computed with # all timepoints, not a subset of the columns return self._PCA.eigen_values_ @property def pca_evecs(self): self._pcagate('eigen vectors') return self._PCA.eigen_vectors_ def load_vec(self, k): """ Return loading vector series for k. If k > number of components computed with runpca(), this raises an error rather than recomputing. """ self._pcagate('load_vec') if k > len(self.shape[1]): raise CorrError('Principle components must be <= number' 'of samples %s'% self.shape[1]) # Decided to put impetus on user to recompute when not using enough principle components # rather then trying to figure out logic of all use cases. # If k > currently stored eigenvectors, recomputes pca if self._PCA._k: if k > len(self.pca_evals): logger.warn('Refitting, only %s components were computed' 'originally' % self._PCA._k) self.pca_fit(n_components=k, fit_transform=False) return self._PCA.eigen_vectors_[:,k] def __repr__(self): """ Aligned columns like pyparty.multicanvas """ pad = pvconfig.PAD address = super(Corr2d, self).__repr__().split()[-1].strip("'").strip('>') outstring = '%s (%s X %s) at %s:\n' % (self.__class__.__name__, self.shape[0], self.shape[1], address) #Units # outstring += '%sUnits --> %s X %s\n' % (pad, self.specunit.lower(), self.varunit.lower()) #Centering outstring += '%sCentering --> %s\n' % (pad, self.center) #Scaling if self._scaled: outstring += '%sScaled --> %s\n' % (pad, self._scale_string) else: outstring += '%sScaled --> %s\n' % (pad, self._scaled) outstring += '%sUnits --> [%s X %s]' % (pad, self.specunit.lower(), self.varunit.lower()) return outstring
class Corr2d(object): """ Computed 2d correlation spectra, including synchronous and asynchronus, correlation, disrelation and other spectra given a 2d data matrix, index and columns. Index and columns are necessary for plotting, so made them a mandatory requirement.""" # Columns aren't used; should I eliminate def __init__(self, data, index, columns, idx_unit = 'index', col_unit='col', centered=False): """ """ if data.ndim != 2: raise CorrError('Data must be 2d Matrix.') # Array typecheck? self.data = data self.index = index #Relax these maybe and just hide some sideplots... self.columns = columns self.idx_unit = idx_unit self.col_unit = col_unit # Defaults self._scaled = False self._alpha = 0.8 self._beta = 0.0 self._PCA = None self._centered = False if centered: if centered == True: self._centered = True else: self._centered = str(centered) #User can say "max centered" def scale(self, *args, **kwargs): """Scale the synchronous and asynchronous spectra via REF 2 based on generalized exponential parameters. Scaling alpha will enhance the fine detail of the correlations, but also the noise. Enhancing beta can screen the fine details and enhance the primary correlations. Alpha 0.8 and beta 0.0 are suggested as optimal tradeoff between fine correlation enhancement and low noise. """ if args: if len(args) > 1: raise CorrError('Please use keywords (alpha=..., beta=...) to' 'avoid ambiguity.') # Make that a custom exception if args[0] == True: if self._scaled == True: logger.warn("Data already scaled!") self._scaled = True elif args[0] == False: if self._scaled == False: logger.warn("Data already unscaled!") self._scaled = False else: raise CorrError('Argument "%s" not understood; please use True or False.') else: self._scaled = True self._alpha = kwargs.pop('alpha', self._alpha) self._beta = kwargs.pop('beta', self._beta) def center(self, style='mean'): #Just call mean centered? """ Mean centers data. Mean centering is defined columnwise, and while this can be down by a call to dataframe.subtract(x.mean(axis=1), axis=0), that requries a pandas dataframe method. Instead, we transpose the data, subtract the mean, then transpose again. (confirmed equivalent)""" if self._centered: logger.warn('Data is already centered.') #Better than a warning I think #In case user sets centered ot 'max' or something in __init__ else: if style == 'mean': # Alternate way, pandas dependent # self.data = self.data.subtract(self.data.mean(axis=1), axis=0) data_trans = self.data.transpose() self.data = (data_trans - data_trans.mean()).transpose() self._centered = True else: raise NotImplementedError('mean centering only supported') # Used internally; for example in calculation of coeff_corr, need unscaled @property def synchronous_noscale(self): """ """ m = self.data.shape[1] # columns return np.dot(self.data, self._dynconjtranspose) / (m - 1.0) #ORDER OF OPERATIONS DEPENDENT (aka np.dot(t_dyn, dyn) doesn't work) @property def asynchronous_noscale(self): """ """ m = self.data.shape[1] # columns return np.dot(self.data, np.dot(self._noda, self._dynconjtranspose) ) / (m-1.0) @property def synchronous(self): """ """ if self._scaled: return self.synchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \ abs(self.coeff_corr)**(self._beta) # ** faster than np.power but abs and np.abs same else: return self.synchronous_noscale @property def asynchronous(self): """ """ if self._scaled: return self.asynchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \ abs(self.coeff_disr)**(self._beta) else: return self.asynchronous_noscale @property def coeff_corr(self): """ Correlation coefficient (pg 78) """ return np.divide(self.synchronous_noscale, self.data.std(axis=1)) @property def coeff_disr(self): """ Disrelation coefficient (pg 79) """ # Not the same as np.sqrt( 1 - coef_corr**2), only same in magnitude! return np.divide(self.asynchronous_noscale, self.data.std(axis=1)) @property def phase_angle(self): """ Global phase angle (pg 79). This will use scaled data.""" return np.arctan(self.asynchronous/self.synchronous) @property def _noda(self): """ Store noda matrix of data; depends of number of columns in data. """ return noda_matrix(self.shape[1]) @property def _dynconjtranspose(self): """ Dynamic spectrum conjugate transpose; helpful to be cached""" return np.conj(self.data).transpose() # Do I want xx, yy in here? def plot(self, attr='synchronous', sideplots='mean', annotate=True, **plotkwargs): """ Visualize synchronous, asynchronous or phase angle spectra. Parameters ---------- attr: str attribute (e.g. 'synchronous') or numpy 2d array Select which correlation spectra to plot. Choose from 'sync', 'async' or 'phase' for synchronous, asynchronous and phase_angle matricies. In addition, can pass a custom matrix. This is mainly for use case of plotting arithmetic operaitons on sync, asynch and other matricies. For example, if one wants to plot the squared synchronous spectrum, they can square the matrix, pass it back into this plotting funciton, and the index, titles and so forth will all be preserved. See examples/documention. contours: int (20) Number of contours to display. sideplots: str or bool ('mean') If True, sideplots will be put on side axis of cross plots. Use 'empty' to return blank sideplots. mean', 'min', 'max', will plot these respective spectra on the sideplots. annotate: bool (True) Adds some default title and x/y labels and text to plot. Setting false is shortcut to removing them all cbar : str or bool (False) Add a colorbar to the plot. Set cbar to 'top', 'bottom', 'left' or 'right' to control position. colormap : str or bool ('jet') Color map to apply to the contour plot. grid : bool (True) Apply a grid to the contour and sideplots fill : bool (True) Contours are lines, or filled regions. **plotkwargs: dict Any valid matplotlib contour plot keyword, as well as xlabel, ylabel and title for convenience. Returns ------- tuple (matplotlib.Axes) If side plots, returns (ax1, ax2, ax3, ax4) If not side plots, returns ax4 only """ # if user passes matrix instead of a string if not isinstance(attr, str): attr_title = 'Custom' # Need to dataframe convert or mean/min/max syntax won't work (sideplots) if not isinstance(attr, pandas.DataFrame): try: data = pandas.DataFrame(attr) #Don't set index/columns; except Exception: raise CorrError('Could not convert data of type %s to ' 'DataFrame. This is used for various' 'subroutins including sideplot mean/max.') elif attr in ['sync', 'synchronous']: attr_title = 'Synchronous' #For plot data = getattr(self, 'synchronous') elif attr in ['async', 'asynchronous']: attr_title = 'Asynchronous' #For plot data = getattr(self, 'asynchronous') elif attr in ['phase', 'phase_angle']: data = getattr(self, 'phase_angle') attr_title = 'Phase Angle' #For plot else: # Make better raise Corr2d('Valid plots include "sync", "async", "phase".' 'Alternatively, pass a custom matrix.') linekwds = dict(linewidth=1, linestyle='-', color='black') # Only set defaults for labels/title if annotate if annotate: plotkwargs.setdefault('xlabel', self.idx_unit) plotkwargs.setdefault('ylabel', self.idx_unit) # Title cols = self.columns try: plotkwargs.setdefault('title', '%s (%.2f - %.2f %s)' % ( attr_title, cols.min(), cols.max(), self.col_unit.lower())) # Working with timestamps (leave in year?) except TypeError: if self.col_unit.lower() == 'timestamp': #Bit of a hack plotkwargs.setdefault('title', '%s (%s - %s)' % ( attr_title, #str(cols.min()).split()[1], #Cut out year #str(cols.max()).split()[1]) cols.min(), cols.max()) ) # Full string format, not alteration of timestamp values else: plotkwargs.setdefault('title', '%s (%s - %s %s)' % ( attr_title, cols.min(), cols.max(), self.col_unit.lower())) # MAKE A DICT THAT RENAMES THESE synchronous: Synchronous Spectrm # phase_angle or 'phase' or w/e to: "Phase Anlge" (sans spectrum) xx, yy = np.meshgrid(self.index, self.index) if sideplots: if sideplots == True: sideplots = 'mean' if self._centered: symbol = self.index._unit.symbol label1 = r'$\bar{A}(%s_1)$' % symbol label2 = r'$\bar{A}(%s_2)$' % symbol else: label1, label2 = r'$A(%s_1)$' % symbol, r'$A(%s_2)$' % symbol ax1, ax2, ax3, ax4 = _gencorr2d(xx, yy, data, label1, label2, **plotkwargs ) # Problem here: this is calling plot method of if sideplots == 'mean': ax2.plot(self.index, data.mean(axis=1), **linekwds) ax3.plot(self.index, data.mean(axis=1), **linekwds) elif sideplots == 'max': ax2.plot(self.index, data.max(axis=1), **linekwds) ax3.plot(self.index, data.max(axis=1), **linekwds) elif sideplots == 'min': ax2.plot(self.index, data.min(axis=1), **linekwds) ax3.plot(self.index, data.min(axis=1), **linekwds) elif sideplots == 'empty': pass else: raise Corr2d('sideplots keyword must be "mean", "max", "min",' ' or "empty".') # Reorient ax3 pvutil.invert_ax(ax3) if sideplots != 'empty': ax2.set_ylabel(sideplots) ax2.yaxis.set_label_position('right') return (ax1, ax2, ax3, ax4) else: return _gen2d(xx, yy, data, **plotkwargs)[0] #return axes, not contours @property def shape(self): return self.data.shape def _pcagate(self, attr): """ Raise an error if use calls inaccessible PCA method.""" if not self._PCA: raise CorrError('Please run .pca_fit() method before ' 'calling %s.%s' % self.__class__.__name__, attr) def pca_fit(self, n_components=None, fit_transform=True):# k=None, kernel=None, extern=False): """ Adaptation of Alexis Mignon's pca.py script Adapted to fit PyUvVis 5/6/2013. Original credit to Alexis Mignon: Module for Principal Component Analysis. Author: Alexis Mignon (c) Date: 10/01/2012 e-mail: [email protected] (https://code.google.com/p/pypca/source/browse/trunk/PCA.py) Constructor arguments: * k: number of principal components to compute. 'None' (default) means that all components are computed. * kernel: perform PCA on kernel matrices (default is False) * extern: use extern product to perform PCA (default is False). Use this option when the number of samples is much smaller than the number of features. See pca.py constructor for more info. This will initialize PCA class and fit current values of timespectra. Notes: ------ The pcakernel.py module is more modular. These class methods make it easier to perform PCA on a timespectra, but are less flexible than using the module functions directly. timespectra gets transposed as PCA module expects rows as samples and columns as features. Changes to timespectra do not retrigger PCA refresh. This method should be called each time changes are made to the data. """ if self._centered != True: logger.warn('Builtin PCA will perform mean-centering on' ' data. Data is not mean centered yet.') self._PCA = PCA(n_components=n_components) if fit_transform: return self._PCA.fit_transform(self.data)#.transpose()) else: self._PCA.fit(self.data)#.transpose()) @property def PCA(self): """ Return the full PCA class object""" self._pcagate('pca') return self._PCA @property def pca_evals(self): self._pcagate('eigen values') # Index is not self.columns because eigenvalues are still computed with # all timepoints, not a subset of the columns return self._PCA.eigen_values_ @property def pca_evecs(self): self._pcagate('eigen vectors') return self._PCA.eigen_vectors_ def load_vec(self, k): """ Return loading vector series for k. If k > number of components computed with runpca(), this raises an error rather than recomputing. """ self._pcagate('load_vec') if k > len(self.shape[1]): raise CorrError('Principle components must be <= number' 'of samples %s'% self.shape[1]) # Decided to put impetus on user to recompute when not using enough principle components # rather then trying to figure out logic of all use cases. # If k > currently stored eigenvectors, recomputes pca if self._PCA._k: if k > len(self.pca_evals): logger.warn('Refitting, only %s components were computed' 'originally' % self._PCA._k) self.pca_fit(n_components=k, fit_transform=False) return self._PCA.eigen_vectors_[:,k] # Alternate constructers @classmethod def from_spectra(cls, ts, **kwargs): kwargs.setdefault('idx_unit',ts.full_specunit), kwargs.setdefault('col_unit',ts.full_varunit), return cls(np.array(ts), ts.index, ts.columns, **kwargs) def __repr__(self): """ Aligned columns like pyparty.multicanvas """ pad = pvconfig.PAD address = super(Corr2d, self).__repr__().split()[-1].strip("'").strip('>') outstring = '%s (%s X %s) at %s:\n' % (self.__class__.__name__, self.shape[0], self.shape[1], address) #Units # outstring += '%sUnits --> %s X %s\n' % (pad, self.idx_unit.lower(), self.col_unit.lower()) #Centering outstring += '%sCentering --> %s\n' % (pad, self._centered) #Scaling if self._scaled: outstring += '%sScaled --> %s (a=%s, b=%s)\n' % \ (pad, self._scaled, self._alpha, self._beta) else: outstring += '%sScaled --> %s\n' % (pad, self._scaled) outstring += '%sUnits --> [%s X %s]' % (pad, self.idx_unit.lower(), self.col_unit.lower()) return outstring
class Corr2d(object): """ Computed 2d correlation spectra, including synchronous and asynchronus, correlation, disrelation and other spectra given a 2d data matrix, index and columns. Index and columns are necessary for plotting, so made them a mandatory requirement.""" # Columns aren't used; should I eliminate def __init__(self, data, index, columns, idx_unit='index', col_unit='col', centered=False): """ """ if data.ndim != 2: raise CorrError('Data must be 2d Matrix.') # Array typecheck? self.data = data self.index = index #Relax these maybe and just hide some sideplots... self.columns = columns self.idx_unit = idx_unit self.col_unit = col_unit # Defaults self._scaled = False self._alpha = 0.8 self._beta = 0.0 self._PCA = None self._centered = False if centered: if centered == True: self._centered = True else: self._centered = str(centered) #User can say "max centered" def scale(self, *args, **kwargs): """Scale the synchronous and asynchronous spectra via REF 2 based on generalized exponential parameters. Scaling alpha will enhance the fine detail of the correlations, but also the noise. Enhancing beta can screen the fine details and enhance the primary correlations. Alpha 0.8 and beta 0.0 are suggested as optimal tradeoff between fine correlation enhancement and low noise. """ if args: if len(args) > 1: raise CorrError('Please use keywords (alpha=..., beta=...) to' 'avoid ambiguity.') # Make that a custom exception if args[0] == True: if self._scaled == True: logger.warn("Data already scaled!") self._scaled = True elif args[0] == False: if self._scaled == False: logger.warn("Data already unscaled!") self._scaled = False else: raise CorrError( 'Argument "%s" not understood; please use True or False.') else: self._scaled = True self._alpha = kwargs.pop('alpha', self._alpha) self._beta = kwargs.pop('beta', self._beta) def center(self, style='mean'): #Just call mean centered? """ Mean centers data. Mean centering is defined columnwise, and while this can be down by a call to dataframe.subtract(x.mean(axis=1), axis=0), that requries a pandas dataframe method. Instead, we transpose the data, subtract the mean, then transpose again. (confirmed equivalent)""" if self._centered: logger.warn( 'Data is already centered.') #Better than a warning I think #In case user sets centered ot 'max' or something in __init__ else: if style == 'mean': # Alternate way, pandas dependent # self.data = self.data.subtract(self.data.mean(axis=1), axis=0) data_trans = self.data.transpose() self.data = (data_trans - data_trans.mean()).transpose() self._centered = True else: raise NotImplementedError('mean centering only supported') # Used internally; for example in calculation of coeff_corr, need unscaled @property def synchronous_noscale(self): """ """ m = self.data.shape[1] # columns return np.dot(self.data, self._dynconjtranspose) / ( m - 1.0 ) #ORDER OF OPERATIONS DEPENDENT (aka np.dot(t_dyn, dyn) doesn't work) @property def asynchronous_noscale(self): """ """ m = self.data.shape[1] # columns return np.dot(self.data, np.dot(self._noda, self._dynconjtranspose)) / (m - 1.0) @property def synchronous(self): """ """ if self._scaled: return self.synchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \ abs(self.coeff_corr)**(self._beta) # ** faster than np.power but abs and np.abs same else: return self.synchronous_noscale @property def asynchronous(self): """ """ if self._scaled: return self.asynchronous_noscale * self.data.var(axis=1)**(-1.0 * self._alpha) * \ abs(self.coeff_disr)**(self._beta) else: return self.asynchronous_noscale @property def coeff_corr(self): """ Correlation coefficient (pg 78) """ return np.divide(self.synchronous_noscale, self.data.std(axis=1)) @property def coeff_disr(self): """ Disrelation coefficient (pg 79) """ # Not the same as np.sqrt( 1 - coef_corr**2), only same in magnitude! return np.divide(self.asynchronous_noscale, self.data.std(axis=1)) @property def phase_angle(self): """ Global phase angle (pg 79). This will use scaled data.""" return np.arctan(self.asynchronous / self.synchronous) @property def _noda(self): """ Store noda matrix of data; depends of number of columns in data. """ return noda_matrix(self.shape[1]) @property def _dynconjtranspose(self): """ Dynamic spectrum conjugate transpose; helpful to be cached""" return np.conj(self.data).transpose() # Do I want xx, yy in here? def plot(self, attr='synchronous', sideplots='mean', annotate=True, **plotkwargs): """ Visualize synchronous, asynchronous or phase angle spectra. Parameters ---------- attr: str attribute (e.g. 'synchronous') or numpy 2d array Select which correlation spectra to plot. Choose from 'sync', 'async' or 'phase' for synchronous, asynchronous and phase_angle matricies. In addition, can pass a custom matrix. This is mainly for use case of plotting arithmetic operaitons on sync, asynch and other matricies. For example, if one wants to plot the squared synchronous spectrum, they can square the matrix, pass it back into this plotting funciton, and the index, titles and so forth will all be preserved. See examples/documention. contours: int (20) Number of contours to display. sideplots: str or bool ('mean') If True, sideplots will be put on side axis of cross plots. Use 'empty' to return blank sideplots. mean', 'min', 'max', will plot these respective spectra on the sideplots. annotate: bool (True) Adds some default title and x/y labels and text to plot. Setting false is shortcut to removing them all cbar : str or bool (False) Add a colorbar to the plot. Set cbar to 'top', 'bottom', 'left' or 'right' to control position. colormap : str or bool ('jet') Color map to apply to the contour plot. grid : bool (True) Apply a grid to the contour and sideplots fill : bool (True) Contours are lines, or filled regions. **plotkwargs: dict Any valid matplotlib contour plot keyword, as well as xlabel, ylabel and title for convenience. Returns ------- tuple (matplotlib.Axes) If side plots, returns (ax1, ax2, ax3, ax4) If not side plots, returns ax4 only """ # if user passes matrix instead of a string if not isinstance(attr, str): attr_title = 'Custom' # Need to dataframe convert or mean/min/max syntax won't work (sideplots) if not isinstance(attr, pandas.DataFrame): try: data = pandas.DataFrame(attr) #Don't set index/columns; except Exception: raise CorrError('Could not convert data of type %s to ' 'DataFrame. This is used for various' 'subroutins including sideplot mean/max.') elif attr in ['sync', 'synchronous']: attr_title = 'Synchronous' #For plot data = getattr(self, 'synchronous') elif attr in ['async', 'asynchronous']: attr_title = 'Asynchronous' #For plot data = getattr(self, 'asynchronous') elif attr in ['phase', 'phase_angle']: data = getattr(self, 'phase_angle') attr_title = 'Phase Angle' #For plot else: # Make better raise Corr2d('Valid plots include "sync", "async", "phase".' 'Alternatively, pass a custom matrix.') linekwds = dict(linewidth=1, linestyle='-', color='black') # Only set defaults for labels/title if annotate if annotate: plotkwargs.setdefault('xlabel', self.idx_unit) plotkwargs.setdefault('ylabel', self.idx_unit) # Title cols = self.columns try: plotkwargs.setdefault( 'title', '%s (%.2f - %.2f %s)' % (attr_title, cols.min(), cols.max(), self.col_unit.lower())) # Working with timestamps (leave in year?) except TypeError: if self.col_unit.lower() == 'timestamp': #Bit of a hack plotkwargs.setdefault( 'title', '%s (%s - %s)' % ( attr_title, #str(cols.min()).split()[1], #Cut out year #str(cols.max()).split()[1]) cols.min(), cols.max())) # Full string format, not alteration of timestamp values else: plotkwargs.setdefault( 'title', '%s (%s - %s %s)' % (attr_title, cols.min(), cols.max(), self.col_unit.lower())) # MAKE A DICT THAT RENAMES THESE synchronous: Synchronous Spectrm # phase_angle or 'phase' or w/e to: "Phase Anlge" (sans spectrum) xx, yy = np.meshgrid(self.index, self.index) if sideplots: if sideplots == True: sideplots = 'mean' symbol = self.index._unit.symbol if self._centered: label1 = r'$\bar{A}(%s_1)$' % symbol label2 = r'$\bar{A}(%s_2)$' % symbol else: label1, label2 = r'$A(%s_1)$' % symbol, r'$A(%s_2)$' % symbol ax1, ax2, ax3, ax4 = _gencorr2d(xx, yy, data, label1, label2, **plotkwargs) # Problem here: this is calling plot method of if sideplots == 'mean': ax2.plot(self.index, data.mean(axis=1), **linekwds) ax3.plot(self.index, data.mean(axis=1), **linekwds) elif sideplots == 'max': ax2.plot(self.index, data.max(axis=1), **linekwds) ax3.plot(self.index, data.max(axis=1), **linekwds) elif sideplots == 'min': ax2.plot(self.index, data.min(axis=1), **linekwds) ax3.plot(self.index, data.min(axis=1), **linekwds) elif sideplots == 'empty': pass else: raise Corr2d('sideplots keyword must be "mean", "max", "min",' ' or "empty".') # Reorient ax3 pvutil.invert_ax(ax3) if sideplots != 'empty': ax2.set_ylabel(sideplots) ax2.yaxis.set_label_position('right') return (ax1, ax2, ax3, ax4) else: # If no sideplots, can allow for 3d plots plotkwargs.setdefault('kind', 'contour') return _gen2d3d(xx, yy, data, **plotkwargs)[0] #return axes, not contours @property def shape(self): return self.data.shape def _pcagate(self, attr): """ Raise an error if use calls inaccessible PCA method.""" if not self._PCA: raise CorrError( 'Please run .pca_fit() method before ' 'calling %s.%s' % self.__class__.__name__, attr) def pca_fit(self, n_components=None, fit_transform=True): # k=None, kernel=None, extern=False): """ Adaptation of Alexis Mignon's pca.py script Adapted to fit PyUvVis 5/6/2013. Original credit to Alexis Mignon: Module for Principal Component Analysis. Author: Alexis Mignon (c) Date: 10/01/2012 e-mail: [email protected] (https://code.google.com/p/pypca/source/browse/trunk/PCA.py) Constructor arguments: * k: number of principal components to compute. 'None' (default) means that all components are computed. * kernel: perform PCA on kernel matrices (default is False) * extern: use extern product to perform PCA (default is False). Use this option when the number of samples is much smaller than the number of features. See pca.py constructor for more info. This will initialize PCA class and fit current values of timespectra. Notes: ------ The pcakernel.py module is more modular. These class methods make it easier to perform PCA on a timespectra, but are less flexible than using the module functions directly. timespectra gets transposed as PCA module expects rows as samples and columns as features. Changes to timespectra do not retrigger PCA refresh. This method should be called each time changes are made to the data. """ if self._centered != True: logger.warn('Builtin PCA will perform mean-centering on' ' data. Data is not mean centered yet.') self._PCA = PCA(n_components=n_components) if fit_transform: return self._PCA.fit_transform(self.data) #.transpose()) else: self._PCA.fit(self.data) #.transpose()) @property def PCA(self): """ Return the full PCA class object""" self._pcagate('pca') return self._PCA @property def pca_evals(self): self._pcagate('eigen values') # Index is not self.columns because eigenvalues are still computed with # all timepoints, not a subset of the columns return self._PCA.eigen_values_ @property def pca_evecs(self): self._pcagate('eigen vectors') return self._PCA.eigen_vectors_ def load_vec(self, k): """ Return loading vector series for k. If k > number of components computed with runpca(), this raises an error rather than recomputing. """ self._pcagate('load_vec') if k > len(self.shape[1]): raise CorrError('Principle components must be <= number' 'of samples %s' % self.shape[1]) # Decided to put impetus on user to recompute when not using enough principle components # rather then trying to figure out logic of all use cases. # If k > currently stored eigenvectors, recomputes pca if self._PCA._k: if k > len(self.pca_evals): logger.warn('Refitting, only %s components were computed' 'originally' % self._PCA._k) self.pca_fit(n_components=k, fit_transform=False) return self._PCA.eigen_vectors_[:, k] # Alternate constructers @classmethod def from_spectra(cls, ts, **kwargs): kwargs.setdefault('idx_unit', ts.full_specunit), kwargs.setdefault('col_unit', ts.full_varunit), return cls(np.array(ts), ts.index, ts.columns, **kwargs) def __repr__(self): """ Aligned columns like pyparty.multicanvas """ pad = pvconfig.PAD address = super(Corr2d, self).__repr__().split()[-1].strip("'").strip('>') outstring = '%s (%s X %s) at %s:\n' % ( self.__class__.__name__, self.shape[0], self.shape[1], address) #Units # outstring += '%sUnits --> %s X %s\n' % (pad, self.idx_unit.lower(), self.col_unit.lower()) #Centering outstring += '%sCentering --> %s\n' % (pad, self._centered) #Scaling if self._scaled: outstring += '%sScaled --> %s (a=%s, b=%s)\n' % \ (pad, self._scaled, self._alpha, self._beta) else: outstring += '%sScaled --> %s\n' % (pad, self._scaled) outstring += '%sUnits --> [%s X %s]' % ( pad, self.idx_unit.lower(), self.col_unit.lower()) return outstring