return 'Formate signals in one DataFrame for computing' # If input signals are multivariates, only the first column is considered x = pd.DataFrame() for i in range(0, len(signals)): if x.empty: x = pd.DataFrame(signals[i].iloc[:, 0], signals[i].index) x.columns = [signals[i].columns[0]] else: x[signals[i].columns[0]] = signals[i].iloc[:, 0] ''' Ignore last value if len(x) is odd (avoiding trouble with fft<=>ifft)''' if (x.shape[0] % 2 != 0): x = x.iloc[0:x.shape[0] - 1, :] ''' Standardize ''' X = Standardize.Standardize(x) ''' Correlation matrix ''' # division by zero already checked C = np.dot(X.values.T, X.values) / float( x.index.size) # on original signal ''' Eigenvalues decomposition ''' eig_values, eig_vectors = np.linalg.eig(C) # on original signal eig_values = np.sort(eig_values) # sort by increasing order ''' Surrogate signal ''' df_X_surr, X_surr_average_eig = self.refined_AAFT_surrogate(X) # Phil - store sums to avoid recompute for checking zeroes and later on sum_eig_values = sum(eig_values) sum_X_surr_average_eig = sum(X_surr_average_eig) # check division by zero
class CCA(Method): """ extract the highest correlations possible between projections of the features of the two datasets. datasets must contain the same number of row (individual or sample) But can contain different number of feature (i.e different number of rows) returns the weight of projections and correlations that comes with those weights """ argsList = MethodArgList() argsList.append('xData_filename', '', file, 'First data set in cvs format') argsList.append('yData_filename', '', file, 'Second data set in cvs format') argsList.append('nbr_correlations', 0, int, 'number of maximised correlations wanted') argsList.append('standerdized', False, bool, 'are the two datasets centered and reduced') def __init__(self, plot=False, nbr_correlations=0, standerdized=False, xData=None, yData=None, xData_filename=None, yData_filename=None, **kwargs): ' Init ' super(CCA, self).__init__(plot, **kwargs) if xData is None: if not (xData_filename and isinstance(xData_filename, file)) \ or len(xData_filename.name) == 0: raise TypeError("Requires xData_filename to be a file") if yData is None: if not (yData_filename and isinstance(yData_filename, file)) \ or len(yData_filename.name) == 0: raise TypeError("Requires yData_filename to be a file") if not (isinstance(nbr_correlations, int)): raise TypeError("Requires m to be an integer") if not (isinstance(standerdized, bool)): raise TypeError("Requires center to be a boolean") #other rule for parameters if nbr_correlations < 0: raise ValueError("Requires m to be positive or greater than 0") if isinstance(xData_filename, file): xData = pd.DataFrame.from_csv(xData_filename) if isinstance(yData_filename, file): yData = pd.DataFrame.from_csv(yData_filename) if not (isinstance(xData, pd.DataFrame)): raise TypeError("Requires xData to be a panda dataframe") if not (isinstance(yData, pd.DataFrame)): raise TypeError("Requires yData to be a panda dataframe") self._xData = xData self._yData = yData self._nbr_correlations = nbr_correlations self._standerdized = standerdized self.lx = None self.ly = None return def vector_root(self, V): l = V.size U = np.array(V, float) for i in range(0, l): U[i] = sqrt(U[i]) return U def inv_vector_root(self, V): l = V.size U = np.array(V, float) for i in range(0, l): if (V[i] > 0): U[i] = 1 / sqrt(U[i]) else: V[i] = 0 return U def cov(self, A, B): l, c = A.shape aux = 1.0 / (l - 1) sigma = aux * np.dot(A.T, B) return sigma def SVD_CCA(self, S_X, S_Y, S_XY, m): D, v_x = eig(S_X) D = self.inv_vector_root(D) D = diag(D) CX = np.dot(np.dot(v_x, D), inv(v_x)) D, v_y = eig(S_Y) D = self.inv_vector_root(D) D = diag(D) CY = np.dot(np.dot(v_y, D), inv(v_y)) omega = np.dot(np.dot(CX, S_XY), CY) U, D, T = svd(omega, full_matrices=0) A = np.dot(CX, U) A = A[:, :m] B = np.dot(CY, T.T) B = B[:, :m] D = D[0:m] D = self.vector_root(D) return D, B, A def compute(self, signals): x = self._xData y = self._yData ' Raise error if parameters are not in the correct type ' try: if not (isinstance(x, pd.DataFrame)): raise TypeError("Requires x to be a pd.DataFrame") if not (isinstance(y, pd.DataFrame)): raise TypeError("Requires y to be a pd.DataFrame") except TypeError, err_msg: raise TypeError(err_msg) return if not self._standerdized: x = Standardize.Standardize(x) y = Standardize.Standardize(y) x = x.values y = y.values if self._nbr_correlations == 0: self.lx = x.shape[1] self.ly = y.shape[1] self._nbr_correlations = min(self.lx, self.ly) cov_x = self.cov(x, x) cov_y = self.cov(y, y) cov_xy = self.cov(x, y) D, A, B = self.SVD_CCA(cov_x, cov_y, cov_xy, self._nbr_correlations) res = {} res['corr'] = D res['yWeights'] = A res['xWeights'] = B return res
except TypeError, err_msg: raise TypeError(err_msg) return ' Raise error if m and t are too big to do embedding ' try: if ((x.shape[0] - self.t * (self.m - 1)) < 1) or ((y.shape[0] - self.t * (self.m - 1) < 1)): raise ValueError("m or t values are too big") except ValueError, err_msg: raise ValueError(err_msg) return if self.standardization == True: x = Standardize.Standardize(x) y = Standardize.Standardize(y) if (self.m != 1) or (self.t != 1): x = self.embedding(x) y = self.embedding(y) vd = 2 if (self.distance == 'euclidean'): pass elif (self.distance == 'manhattan'): vd = 1 elif (self.distance == 'maximum'): vd = np.inf crp_tmp = np.zeros((x.shape[0], y.shape[0]))
) if self.tau_max > self.ly: raise Warning( "the value -(length y -1) will be used as -tau_max") except Warning, war_msg: raise Warning(war_msg) if self.standardization == False: self.corr_f_full = np.correlate(x.iloc[:, 0], y.iloc[:, 0], mode='full', old_behavior=False) self.corr_f = self.corr_f_full[start:stop + 1] else: x_std = Standardize.Standardize(x) y_std = Standardize.Standardize(y) self.corr_f_full = np.correlate(x_std.iloc[:, 0], y_std.iloc[:, 0], mode='full', old_behavior=False) self.corr_f = self.corr_f_full[start:stop + 1] if self.scale == True: nx = np.linalg.norm(x.values, 2) ny = np.linalg.norm(y.values, 2) self.corr_f = self.corr_f_full[start:stop + 1] / (nx * ny) res_corr = {} res_corr['corr_funct'] = self.corr_f
def compute(self, *signals): """ Computes SSI for multiple monovariate signals (organized as a list). If input signals are multivariates, only the first column of the signal is considered :param signals: list of signals, one per person. :type signals: list[pd.DataFrame] :returns: dict -- Synchronization indexes : S-Estimator (SSI), Genuine Synchronization Index (GSI) and Random Synchronization Index (RSI) """ ' Raise error if parameters are not in the correct type ' for i in range(len(signals)): if not (isinstance(signals[i], pd.DataFrame)): raise TypeError("Requires signal " + str(i + 1) + " to be a pd.DataFrame.") ' Raise error if DataFrames have not the same size or same indexes ' for i in range(0, len(signals)): if len(signals[0]) != len(signals[i]): raise ValueError( "All the signals must have the same size. Signal " + str(i + 1) + " does not have the same size as first signal.") if signals[0].index.tolist() != signals[i].index.tolist(): raise ValueError( "All the signals must have the same time indexes. Signal " + str(i + 1) + " does not have the same time index as first signal.") # check division by zero ''' Raise error if "len(signals) can't be 0 because we divide by the size x's index that depends on it''' if len(signals) == 0: raise ValueError( "len(signals) can't be 0 because we divide by the size x's index that depends on it" ) 'Formate signals in one DataFrame for computing' # If input signals are multivariates, only the first column is considered x = pd.DataFrame() for i in range(0, len(signals)): if x.empty: x = pd.DataFrame(signals[i].iloc[:, 0], signals[i].index) x.columns = [signals[i].columns[0]] else: x[signals[i].columns[0]] = signals[i].iloc[:, 0] ''' Ignore last value if len(x) is odd (avoiding trouble with fft<=>ifft)''' if (x.shape[0] % 2 != 0): x = x.iloc[0:x.shape[0] - 1, :] ''' Standardize ''' X = Standardize.Standardize(x) ''' Correlation matrix ''' # division by zero already checked C = np.dot(X.values.T, X.values) / float( x.index.size) # on original signal ''' Eigenvalues decomposition ''' eig_values, eig_vectors = np.linalg.eig(C) # on original signal eig_values = np.sort(eig_values) # sort by increasing order ''' Surrogate signal ''' df_X_surr, X_surr_average_eig = self.refined_AAFT_surrogate(X) # Phil - store sums to avoid recompute for checking zeroes and later on sum_eig_values = sum(eig_values) sum_X_surr_average_eig = sum(X_surr_average_eig) # check division by zero ' Raise error if sum_eig_values or X_surr_average_eig or sum_X_surr_average_eig eq zero because we divide by the it later' if np.any(sum_eig_values == 0): raise ValueError( "The Sum of eig_values can't be 0 because we divide by it later" ) if np.any(X_surr_average_eig == 0): raise ValueError( "X_surr_average_eig can't be 0 because we divide by it later") if np.any(sum_X_surr_average_eig == 0): raise ValueError( "The Sum of X_surr_average_eig can't be 0 because we divide by it later" ) sum_eig_values_over_X_surr_average_eig = sum(eig_values / X_surr_average_eig) if np.any(sum_eig_values_over_X_surr_average_eig == 0): raise ValueError( "The Sum of (eig_values/X_surr_average_eig) can't be 0 because we divide by it later" ) ''' Get Synchronization Indexes ''' lambda_1 = [] lambda_2 = [] lambda_3 = [] for i in range(X_surr_average_eig.size): lambda_1.append(eig_values[i] / sum_eig_values) # division already checked lambda_2.append((eig_values[i] / X_surr_average_eig[i]) / sum_eig_values_over_X_surr_average_eig) lambda_3.append(X_surr_average_eig[i] / sum_X_surr_average_eig) SI = dict() SI['SSI'] = self.getSynchronizationIndex(lambda_1) SI['GSI'] = self.getSynchronizationIndex(lambda_2) SI['RSI'] = self.getSynchronizationIndex(lambda_3) SI['surrogate_signal'] = df_X_surr if self._plot: plt.ion() self.plot_result(SI) return SI