def rdc1(x, y, k=10, s=0.2): if len(x.shape) == 1: x = x.reshape((-1, 1)) if len(y.shape) == 1: y = y.reshape((-1, 1)) cx = np.column_stack([rankdata(xc, method='ordinal') for xc in x.T]) / float(x.size) cy = np.column_stack([rankdata(yc, method='ordinal') for yc in y.T]) / float(y.size) # Add a vector of ones so that w.x + b is just a dot product O = np.ones(cx.shape[0]) X = np.column_stack([cx, O]) Y = np.column_stack([cy, O]) Rx = (s / X.shape[1]) * np.random.randn(X.shape[1], k) Ry = (s / Y.shape[1]) * np.random.randn(Y.shape[1], k) X = np.dot(X, Rx) Y = np.dot(Y, Ry) X = np.sin() """print rcancor(np.sin(X),np.sin(Y)) return 0" """ cca = CCA(n_components=1) xc, yc = cca.fit_transform(X, Y) result = np.corrcoef(xc.T, yc.T)[0, 1] print(result)
def get_cca(X, Y, n_comp=10): cca = CCA(n_components=n_comp) print("X.shape", X.shape) print("Y.shape", Y.shape) x_scores, y_scores = cca.fit_transform(X, Y) # Manual Transform X -= cca.x_mean_ X /= cca.x_std_ Y -= cca.y_mean_ Y /= cca.y_std_ calc_scores_x = np.dot(X, cca.x_rotations_) calc_scores_y = np.dot(Y, cca.y_rotations_) # id_x = cca.x_rotations_ @ linalg.pinv2(cca.x_rotations_) # id_y = cca.y_rotations_ @ linalg.pinv2(cca.y_rotations_) print("x_scores.shape", x_scores.shape) print("y_scores.shape", y_scores.shape) correlations = np.diag( np.corrcoef(x_scores, y_scores, rowvar=False)[:n_comp, n_comp:]) calc_correlations = np.diag( np.corrcoef(calc_scores_x, calc_scores_y, rowvar=False)[:n_comp, n_comp:]) print(correlations) print(calc_correlations) return x_scores, y_scores
def fbcca(eeg, list_freqs, fs, num_harms=3, num_fbs=5): fb_coefs = np.power(np.arange(1,num_fbs+1),(-1.25)) + 0.25 num_targs, _, num_smpls = eeg.shape #40 taget (means 40 fre-phase combination that we want to predict) y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms) cca = CCA(n_components=1) #initilize CCA # result matrix r = np.zeros((num_fbs,num_targs)) results = np.zeros(num_targs) for targ_i in range(num_targs): test_tmp = np.squeeze(eeg[targ_i, :, :]) #deal with one target a time for fb_i in range(num_fbs): #filter bank number, deal with different filter bank testdata = filterbank(test_tmp, fs, fb_i) #data after filtering for class_i in range(num_targs): refdata = np.squeeze(y_ref[class_i, :, :]) #pick corresponding freq target reference signal test_C, ref_C = cca.fit_transform(testdata.T, refdata.T) # len(row) = len(observation), len(column) = variables of each observation # number of rows should be the same, so need transpose here # output is the highest correlation linear combination of two sets r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) #return r and p_value, use np.squeeze to adapt the API r[fb_i, class_i] = r_tmp rho = np.dot(fb_coefs, r) #weighted sum of r from all different filter banks' result tau = np.argmax(rho) #get maximum from the target as the final predict (get the index) results[targ_i] = tau #index indicate the maximum(most possible) target return results
def test_cca(): """Test CCA.""" # Compare results with Matlab # x = np.random.randn(1000, 11) # y = np.random.randn(1000, 9) # x = demean(x).squeeze() # y = demean(y).squeeze() mat = loadmat('./tests/data/ccadata.mat') x = mat['x'] y = mat['y'] A2 = mat['A2'] B2 = mat['B2'] A1, B1, R = nt_cca(x, y) # if mean(A1(:).*A2(:))<0; A2=-A2; end X1 = np.dot(x, A1) Y1 = np.dot(y, B1) C1 = tscov(np.hstack((X1, Y1)))[0] # Sklearn CCA cca = CCA(n_components=9, scale=False, max_iter=1e6) X2, Y2 = cca.fit_transform(x, y) # C2 = tscov(np.hstack((X2, Y2)).T)[0] # import matplotlib.pyplot as plt # f, (ax1, ax2) = plt.subplots(2, 1) # ax1.imshow(C1) # ax2.imshow(C2) # plt.show() # assert_almost_equal(C1, C2, decimal=4) # Compare with matlab X2 = np.dot(x, A2) Y2 = np.dot(y, B2) C2 = tscov(np.hstack((X2, Y2)))[0] assert_almost_equal(C1, C2)
def compute_SVCCA(activation1, activation2): ''' activation1 - Activation array 1 as a numpy array of size n X m1 activation2 - Activation array 2 as a numpy array of size n X m2 ''' pca_r = 40 # value from Shi et al NeurIPS 2019 n = activation1.shape[0] assert n == activation2.shape[ 0], "Size of activation arrays are different!!" if pca_r > activation1.shape[1]: print( "Activation 1 array has less neurons.. changing number of PCs to ", activation1.shape[1]) pca_r = activation1.shape[1] if pca_r > activation2.shape[1]: print( "Activation 2 array has less neurons.. changing number of PCs to ", activation2.shape[1]) pca_r = activation2.shape[1] pca1 = PCA(n_components=pca_r) red_activation1 = pca1.fit_transform(activation1) pca2 = PCA(n_components=pca_r) red_activation2 = pca2.fit_transform(activation2) cca = CCA(n_components=pca_r) red_activation1_c, red_activation2_c = cca.fit_transform( red_activation1, red_activation2) corr_values = np.zeros(pca_r) for idx in range(pca_r): corr_values[idx] = np.corrcoef( red_activation1_c[:, idx], red_activation2_c[:, idx])[0, 1] # get the off-diagonal element return np.mean(corr_values)
def main(args): (training_file, label_file, test_file, u_file, e, c, output_file, components) = args X_training = load_feat(training_file) n = len(X_training) U = load_feat(u_file) y_training = [float(line.strip()) for line in open(label_file)] U = np.asarray(U) X_training = np.asarray(X_training) #X = preprocessing.normalize(X, norm='l2') y_training = np.asarray(y_training) X_test = load_feat(test_file) y_test = [float(line.strip()) for line in open(test_label)] X_test = np.asarray(X_test) X_test[np.isnan(X_test)] = 0.0 #test_X = preprocessing.normalize(test_X, norm='l2') y_test = np.asarray(y_test) s = min(len(X_training), len(U)) cca = CCA(n_components=components, max_iter=50) (X_cca, U_cca) = cca.fit_transform(X_training[:s], U[:s]) X_test_cca = cca.transform(X_test) svr = SVR(C=c, epsilon=e, kernel='rbf') svr.fit(X_cca, y_training[:s]) pred = svr.predict(X_test_cca) with open(output_file, 'w') as output: for p in pred: print >>output, p return
def rdc_cca(indexes): i, j, rdc_features = indexes cca = CCA(n_components=1, max_iter=CCA_MAX_ITER) X_cca, Y_cca = cca.fit_transform(rdc_features[i], rdc_features[j]) rdc = np.corrcoef(X_cca.T, Y_cca.T)[0, 1] # logger.info(i, j, rdc) return rdc
def main(args): (training_file, label_file, test_file, u_file, e, c, output_file, components) = args X_training = load_feat(training_file) n = len(X_training) U = load_feat(u_file) y_training = [float(line.strip()) for line in open(label_file)] U = np.asarray(U) X_training = np.asarray(X_training) #X = preprocessing.normalize(X, norm='l2') y_training = np.asarray(y_training) X_test = load_feat(test_file) y_test = [float(line.strip()) for line in open(test_label)] X_test = np.asarray(X_test) X_test[np.isnan(X_test)] = 0.0 #test_X = preprocessing.normalize(test_X, norm='l2') y_test = np.asarray(y_test) s = min(len(X_training), len(U)) cca = CCA(n_components=components, max_iter=50) (X_cca, U_cca) = cca.fit_transform(X_training[:s], U[:s]) X_test_cca = cca.transform(X_test) svr = SVR(C=c, epsilon=e, kernel='rbf') svr.fit(X_cca, y_training[:s]) pred = svr.predict(X_test_cca) with open(output_file, 'w') as output: for p in pred: print >> output, p return
def fbcca_realtime(data, list_freqs, fs, num_harms=3, num_fbs=5): fb_coefs = np.power(np.arange(1,num_fbs+1),(-1.25)) + 0.25 num_targs = len(list_freqs) _, num_smpls = data.shape y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms) cca = CCA(n_components=1) #initialize CCA # result matrix r = np.zeros((num_fbs,num_targs)) for fb_i in range(num_fbs): #filter bank number, deal with different filter bank testdata = filterbank(data, fs, fb_i) #data after filtering for class_i in range(num_targs): refdata = np.squeeze(y_ref[class_i, :, :]) #pick corresponding freq target reference signal test_C, ref_C = cca.fit_transform(testdata.T, refdata.T) r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) #return r and p_value if r_tmp == np.nan: r_tmp=0 r[fb_i, class_i] = r_tmp rho = np.dot(fb_coefs, r) #weighted sum of r from all different filter banks' result print(rho) #print out the correlation result = np.argmax(rho) #get maximum from the target as the final predict (get the index), and index indicates the maximum entry(most possible target) ''' Threshold ''' THRESHOLD = 2.1 if abs(rho[result])<THRESHOLD: #2.587=np.sum(fb_coefs*0.8) #2.91=np.sum(fb_coefs*0.9) #1.941=np.sum(fb_coefs*0.6) return 999 #if the correlation isn't big enough, do not return any command else: return result
def rdc_cca(indexes): i, j, cca = indexes cca = CCA(n_components=1) X_cca, Y_cca = cca.fit_transform(GLOBAL_RDC_FEATURES[i], GLOBAL_RDC_FEATURES[j]) # rdc = 1 rdc = numpy.corrcoef(X_cca.T, Y_cca.T)[0, 1] print('ij', i, j) return rdc
def cca(X,Y,K): ''' Perform CCA on two views X, Y and reduce dimension to K return pro ''' cca = CCA(n_components = K,scale=False,max_iter = 1000) X_c, Y_c = cca.fit_transform(X, Y) return X_c,Y_c, cca
def CCA_corrcoeff(X, Y, n_components): n_components = 3 cca = CCA(n_components) U, V = cca.fit_transform(X, Y) X_mean = np.subtract(X, X.mean(axis=0)) Y_mean = np.subtract(Y, Y.mean(axis=0)) A = np.linalg.solve(X_mean.T.dot(X_mean), X_mean.T.dot(U)) B = np.linalg.solve(Y_mean.T.dot(Y_mean), Y_mean.T.dot(V)) return A, B, U, V
def _extract_corr(data, reference): """Correlation extractor. Takes as an input signal and reference, then calculates canonical correlation between them. After that it aquires cross-correlation between cca coefficients and returns asolute value of it.""" data = data.reshape(data.shape[1], 1) reference = reference.reshape(reference.shape[0], 1) cancor = CCA(n_components=1) u, v = cancor.fit_transform(data, reference) coef = np.corrcoef(u.T, v.T) return np.abs(coef[0, 1])
def compute_corr(self, X_test, method="cca"): if self.Y is None: raise ValueError( "Reference matrix Y must be computed using `fit` before computing corr" ) if method == "eig": rho = CCA.cca_eig(X_test.T, self.Y.T)[0] else: # use sklearn implementation cca = CCA_sklearn(n_components=1) Xc, Yc = cca.fit_transform(X_test.T, self.Y.T) rho = pearsonr(Xc[:, 0], Yc[:, 0])[0] return rho
def rdc(x, y, k=20, s=1 / 6., f=np.sin): """ Compute the randomized dependence coefficient This algorithm is able to detect linear and non-linear correlations in the data vectors x and y. This is based on the paper titled "The Randomized Dependence Coefficient" located here https://arxiv.org/abs/1304.7717. Parameters ---------- x : 1D numpy array with shape (N,) data coordinates y : 1D numpy array with shape (N,) data coordinates k,s : float tuning parameters - do not alter unless you really know what you're doing f : non-linear basis function Returns ------- randomized dependence coefficient """ import scipy.stats as stat from sklearn.cross_decomposition import CCA # the original was written in R (just 5 lines!), this is my translation # to numpy/scipy/scikit-learn (the original code is in the comments) # x <- cbind(apply(as.matrix(x),2,function(u)rank(u)/length(u)),1) # y <- cbind(apply(as.matrix(y),2,function(u)rank(u)/length(u)),1) x = stat.rankdata(x) / x.size y = stat.rankdata(y) / y.size x = np.insert(x[:, np.newaxis], 1, 1, axis=1) y = np.insert(y[:, np.newaxis], 1, 1, axis=1) # x <- s/ncol(x)*x%*%matrix(rnorm(ncol(x)*k),ncol(x)) # y <- s/ncol(y)*y%*%matrix(rnorm(ncol(y)*k),ncol(y)) x = np.dot(s / x.shape[1] * x, np.random.normal(size=x.shape[1] * k).reshape((x.shape[1], -1))) y = np.dot(s / y.shape[1] * y, np.random.normal(size=y.shape[1] * k).reshape((y.shape[1], -1))) # cancor(cbind(f(x),1),cbind(f(y),1))$cor[1] x = np.insert(f(x), x.shape[1], 1, axis=1) y = np.insert(f(y), y.shape[1], 1, axis=1) # the following is taken from: # http://stackoverflow.com/questions/37398856/ # how-to-get-the-first-canonical-correlation-from-sklearns-cca-module cca = CCA(n_components=1) x_c, y_c = cca.fit_transform(x, y) return np.corrcoef(x_c.T, y_c.T)[0, 1]
def rdc(X, Y, k=None, s=1. / 6., f=numpy.sin, rand_gen=None, rnorm_X=None, rnorm_Y=None): if X.ndim == 1: X = X[:, numpy.newaxis] if Y.ndim == 1: Y = Y[:, numpy.newaxis] # # heuristic assumption if k is None: k = max(X.shape[1], Y.shape[1]) + 1 # print(k) n_instances = X.shape[0] assert Y.shape[0] == n_instances, (Y.shape[0], n_instances) if rand_gen is None: rand_gen = numpy.random.RandomState(RAND_STATE) # # empirical copula transformation ones_column = numpy.ones((n_instances, 1)) X_c = numpy.concatenate((numpy.apply_along_axis(ecdf, 0, X), ones_column), axis=1) Y_c = numpy.concatenate((numpy.apply_along_axis(ecdf, 0, Y), ones_column), axis=1) # # linear projection through a random gaussian if rnorm_X is None: rnorm_X = rand_gen.normal(size=(X_c.shape[1], k)) if rnorm_Y is None: rnorm_Y = rand_gen.normal(size=(Y_c.shape[1], k)) X_proj = s / X_c.shape[1] * numpy.dot(X_c, rnorm_X) Y_proj = s / Y_c.shape[1] * numpy.dot(Y_c, rnorm_Y) # # non-linear projection # print(f(X_proj), f(X_proj).shape, X_proj.shape) X_proj = numpy.concatenate((f(X_proj), ones_column), axis=1) Y_proj = numpy.concatenate((f(Y_proj), ones_column), axis=1) # # canonical correlation analysis cca = CCA(n_components=1) X_cca, Y_cca = cca.fit_transform(X_proj, Y_proj) rdc = numpy.corrcoef(X_cca.T, Y_cca.T) # print(rdc) return rdc[0, 1]
def cca_correlation(X, Y, n_comp=50): """ :param X, Y: should be N-by-p, N-by-q matrices, :param n_comp: a integer, how many components we want to create and compare. :return: cca_corr, n_comp-by-n_comp matrix X_c, Y_c will be the linear mapped version of X, Y with shape N-by-n_comp, N-by-n_comp shape cc_mat is the """ cca = CCA(n_components=n_comp) X_c, Y_c = cca.fit_transform(X, Y) ccmat = np.corrcoef(X_c, Y_c, rowvar=False) cca_corr = np.diag( ccmat[n_comp:, :n_comp]) # slice out the cross corr part return cca_corr
def cca_subspace(X, Y, n_comp=50, **kwargs): """ :param X, Y: should be N-by-p, N-by-q matrices, N is the dimension for the whole space, p, q are number of basis vectors (Note p, q functions as number of features to be recombined, while N functions as number of sampled). CCA will maximize :param n_comp: a integer, how many components we want to create and compare. :return: cca_corr, n_comp-by-n_comp matrix X_c, Y_c will be the linear mapped version of X, Y with shape N-by-n_comp, N-by-n_comp shape cc_mat is the """ cca = CCA(n_components=n_comp, **kwargs) X_c, Y_c = cca.fit_transform(X, Y) ccmat = np.corrcoef(X_c, Y_c, rowvar=False) cca_corr = np.diag(ccmat[n_comp:, :n_comp]) # slice out the cross corr part return cca_corr, cca
def getCoeff(id,sample,framePeriod,currentTimeMillis): T = framePeriod[0]/60 tau = framePeriod[1]/60 t = currentTimeMillis/1000 x = list(map(lambda x: x - tau/T,sample.copy())) y = [] for n in range(1,N+1): y.append((2/(n*pi)) * sin(pi*n*tau/T) * cos(2*pi*n*(t - tau/2)/T)) del X[id][0] del Y[id][0] X[id].append(x.copy()) Y[id].append(y.copy()) cca = CCA(n_components=1) X_c, Y_c = cca.fit_transform(X[id],Y[id]) result = np.corrcoef(X_c.T, Y_c.T)[0,1] return result
def canonical_correlation_analysis(list_a, list_b, list_y): X = [] Y = [] if len(list_a) != len(list_b) or len(list_b) != len(list_y): return None for i in range(len(list_a)): X.append([list_a[i], list_b[i]]) Y.append(list_y[i]) cca = CCA(n_components=1) X_c, Y_c = cca.fit_transform(X, Y) result = np.corrcoef(X_c.T, Y_c.T)[0, 1] print(np.corrcoef(X_c.T, Y_c.T)) return result
def fbcca_realtime(eeg, list_freqs, fs, num_harms=3, num_fbs=5): print("EEG shape: ", eeg.shape) fb_coefs = np.power(np.arange(1, num_fbs + 1), (-1.25)) + 0.25 num_targs = len(list_freqs) events, _, num_smpls = eeg.shape # 40 taget (means 40 fre-phase combination that we want to predict) y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms) cca = CCA(n_components=1) # initilize CCA # result matrix r = np.zeros((num_fbs, num_targs)) results = np.zeros(num_targs) r_tmp_mode = [] r_tmp_corr_avg = [] for event in range(eeg.shape[0]): test_tmp = np.squeeze(eeg[event, :, :]) # deal with one event a time for fb_i in range(num_fbs): # filter bank number, deal with different filter bank for class_i in range(num_targs): testdata = filterbank(test_tmp, fs, fb_i) # data after filtering refdata = np.squeeze(y_ref[class_i, :, :]) # pick corresponding freq target reference signal test_C, ref_C = cca.fit_transform(testdata.T, refdata.T) # len(row) = len(observation), len(column) = variables of each observation # number of rows should be the same, so need transpose here # output is the highest correlation linear combination of two sets r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) # return r and p_value, use np.squeeze to adapt the API if r_tmp == np.nan: r_tmp = 0 r[fb_i, class_i] = r_tmp rho = np.dot(fb_coefs, r) # weighted sum of r from all different filter banks' result print("rho: ", rho) result = np.argmax(rho) # get maximum from the target as the final predict (get the index), and index indicates the maximum entry(most possible target) print("result: ", result) r_tmp_mode.append(result) print("correlation: ", abs(rho[result])) r_tmp_corr_avg.append(abs(rho[result])) r_mode = mode(r_tmp_mode)[0][0] r_corr_avg = np.mean(r_tmp_corr_avg) print("====Most recurrent class: ====", r_mode) print("====Average correlation: =====", r_corr_avg) THRESHOLD = 0.3 if r_corr_avg >= THRESHOLD: # 2.749=np.sum(fb_coefs*0.85) return r_mode # if the correlation isn't big enough, do not return any command
def load_word_bank_dataset(): """ This function loads the World Bank Data and return it as NxD numpy arrays """ fert_dataset_path = './demo/WorldBankData/fertility_rate.csv' life_exp_dataset_path = './demo/WorldBankData/life_expectancy.csv' years_str_list = [str(year) for year in range(1960, 2017)] if os.path.exists(fert_dataset_path) & os.path.exists( life_exp_dataset_path): # If files exists, load from files # Load and drop rows with missing values fert_rate = pd.read_csv(fert_dataset_path).dropna() life_exp = pd.read_csv(life_exp_dataset_path).dropna() country_field_name = 'Country Code' else: # If files don't exist, download data with wbdata instead # Get life expectancy and fertility rate data life_exp = wbdata.get_dataframe(indicators={ "SP.DYN.LE00.IN": 'value' }).unstack(level=0).transpose().reset_index() fert_rate = wbdata.get_dataframe(indicators={ "SP.DYN.TFRT.IN": 'value' }).unstack(level=0).transpose().reset_index() # Keep only country name and years columns, filter row with N/A's life_exp = life_exp[['country'] + years_str_list].dropna() fert_rate = fert_rate[['country'] + years_str_list].dropna() country_field_name = 'country' # Keep only countries which appear on both dataframes valid_countries = list( set(life_exp[country_field_name]) & set(fert_rate[country_field_name])) life_exp = life_exp[life_exp[country_field_name].isin(valid_countries)] fert_rate = fert_rate[fert_rate[country_field_name].isin(valid_countries)] # Convert to numpy life_exp = life_exp[years_str_list].to_numpy() fert_rate = fert_rate[years_str_list].to_numpy() # Apply CCA cca_transformer = CCA(n_components=2) life_exp_cca, fert_rate_cca = cca_transformer.fit_transform( fert_rate, life_exp) return life_exp_cca, fert_rate_cca
def CanonCoff(self, X): Y = [i for i in range(len(self.cca_frequency))] for i in range(len(self.cca_frequency)): ref = 2 * np.pi * self.t * self.cca_frequency[i] Y[i] = [np.sin(ref), np.cos(ref), np.sin(2 * ref), np.cos(2 * ref)] print(len(X)) cca = CCA(n_components=4) result = np.zeros((len(self.cca_frequency), 4)) for i in range(len(self.cca_frequency)): Z = np.array([Y[i]]) X_c, Y_c = cca.fit_transform(X, Z[0].T) cca_value = np.corrcoef(X_c.T, Y_c.T) for k in range(4): result[i][k] = cca_value[0 + k, 4 + k] result[i] = np.max(result[i]) return result[:, 0]
def cca(self, X1, X2, n_components=2): cca = CCA(n_components=n_components) X1, X2 = cca.fit_transform(X1, X2) ''' from scipy.stats import pearsonr print("Correlation Coefficient") for i in range(n_components): print("{0}:{1:.3f}".format(i, pearsonr(cca.x_scores_[:,i], cca.y_scores_[:,i])[0])) print("") print("") np.set_printoptions(formatter={'float': '{: 0.3f}'.format}) print("X1 loadings") print(cca.x_loadings_.T) print("") print("X2 loadings") print(cca.y_loadings_.T) ''' return pd.DataFrame(X1)
class CCAAnalysis: """Canonical Correlation Analysis for SSVEP paradigm""" def __init__(self, freqs, win_len, s_rate, n_harmonics=1): """ Args: freqs (list): List of target frequencies win_len (float): Window length s_rate (int): Sampling rate of EEG signal n_harmonics (int): Number of harmonics to be considered """ self.freqs = freqs self.win_len = win_len self.s_rate = s_rate self.n_harmonics = n_harmonics self.train_data = self._init_train_data() self.cca = CCA(n_components=1) def _init_train_data(self): t_vec = np.linspace(0, self.win_len, int(self.s_rate * self.win_len)) targets = {} for freq in self.freqs: sig_sin, sig_cos = [], [] for harmonics in range(self.n_harmonics): sig_sin.append(np.sin(2 * np.pi * harmonics * freq * t_vec)) sig_cos.append(np.cos(2 * np.pi * harmonics * freq * t_vec)) targets[freq] = np.array(sig_sin + sig_cos).T return targets def apply_cca(self, eeg): """Apply CCA analysis to EEG data and return scores for each target frequency Args: eeg (np.array): EEG array [n_samples, n_chan] Returns: list of scores for target frequencies """ scores = [] for key in self.train_data: sig_c, t_c = self.cca.fit_transform(eeg, self.train_data[key]) scores.append(np.corrcoef(sig_c.T, t_c.T)[0, 1]) return scores
def transform_cca(self, abs_cols=None, cols=None, clusters=None, kwargs=None): # process arg: cols, clusters df = self.select_data(cols=cols, clusters=clusters) # process arg: abs_cols if abs_cols is not None: df = df[abs_cols] # process arg: kwargs kwargs = self.process_kwargs('cca', kwargs) o_cca = CCA(**kwargs) arr = o_cca.fit_transform(df) nrows, ncols = arr.shape cca_cols = ["cca_{}".format(i) for i in range(ncols)] self.pca_names = cca_cols cca_df = pd.DataFrame(data=arr, columns=cca_cols) self.df = pd.concat([self.df, cca_df], axis=1)
def main(args): (training_file, label_file, test_file, test_label, u_file) = args X_training = load_feat(training_file) n = len(X_training) U = load_feat(u_file) y_training = [int(line.strip()) for line in open(label_file)] U = np.asarray(U) X_training = np.asarray(X_training) #X = preprocessing.normalize(X, norm='l2') y_training = np.asarray(y_training) X_test = load_feat(test_file) y_test = [int(line.strip()) for line in open(test_label)] X_test = np.asarray(X_test) #test_X = preprocessing.normalize(test_X, norm='l2') y_test = np.asarray(y_test) cca = CCA(n_components=100) (X_cca, U_cca) = cca.fit_transform(X_training, U[:n]) X_test_cca = cca.predict(X_test) svr = SVC() svr.fit(X_cca, y_training) pred = svr.predict(X_test_cca) print pred print test_y print accuracy_score(y_test, pred) with open(test_file + '.cca.2.pred', 'w') as output: for p in pred: print >>output, p #svm_model.fit(X, y) #pickle.dump(lr, open(model_file, "wb")) return return
def fbcca_feature(self,eeg, parameter_list, num_harms=3, num_fbs=10): fs = parameter_list[2] / parameter_list[3] fb_coefs = np.power(np.arange(1, num_fbs + 1), (-1.25)) + 0.25 num_targs = len(parameter_list[1]) y_ref = self.cca_reference(parameter_list[1], fs, parameter_list[2], num_harms) cca = CCA(n_components=1) # initilize CCA # result matrix r = np.zeros((num_fbs, num_targs)) for fb_i in range(num_fbs): # filter bank number, deal with different filter bank testdata = self.filter_bank(eeg, fs, fb_i) # data after filtering for class_i in range(num_targs): refdata = np.squeeze(y_ref[class_i, :, :]) # pick corresponding freq target reference signal test_C, ref_C = cca.fit_transform(testdata.T, refdata.T) # len(row) = len(observation), len(column) = variables of each observation # number of rows should be the same, so need transpose here # output is the highest correlation linear combination of two sets r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) # return r and p_value, use np.squeeze to adapt the API r[fb_i, class_i] = r_tmp results = np.dot(fb_coefs, r) # weighted sum of r from all different filter banks' result print("fb_cca:",results) return results
def main(args): (training_file, label_file, test_file, test_label, u_file) = args X_training = load_feat(training_file) n = len(X_training) U = load_feat(u_file) y_training = [int(line.strip()) for line in open(label_file)] U = np.asarray(U) X_training = np.asarray(X_training) #X = preprocessing.normalize(X, norm='l2') y_training = np.asarray(y_training) X_test = load_feat(test_file) y_test = [int(line.strip()) for line in open(test_label)] X_test = np.asarray(X_test) #test_X = preprocessing.normalize(test_X, norm='l2') y_test = np.asarray(y_test) cca = CCA(n_components=100) (X_cca, U_cca) = cca.fit_transform(X_training, U[:n]) X_test_cca = cca.predict(X_test) svr = SVC() svr.fit(X_cca, y_training) pred = svr.predict(X_test_cca) print pred print test_y print accuracy_score(y_test, pred) with open(test_file + '.cca.2.pred', 'w') as output: for p in pred: print >> output, p #svm_model.fit(X, y) #pickle.dump(lr, open(model_file, "wb")) return return
def qvec_cca(**kwargs): embeddings = load_embeddings(**kwargs).T lg = kwargs["lg"] features = load_features(lg).T common_phonemes = embeddings.columns.intersection(features.columns) S = features[common_phonemes] X = embeddings[common_phonemes] cca = CCA(n_components=1) a, b = cca.fit_transform(X.T, S.T) a, b = a.reshape(-1), b.reshape(-1) r, p = pearsonr(a, b) # Write results to disk level, lg, name = kwargs["level"], kwargs["lg"], kwargs["name"] if "hidden" in kwargs: hyperparams = f"{kwargs['size']}-{kwargs['hidden']}" else: hyperparams = f"{kwargs['size']}-{kwargs['window']}" path = f"results/{level}/qvec/{lg}/{name}/{hyperparams}" ensure_dir(path) epoch = kwargs["epoch"] filename = os.path.join(path, f"{epoch}.txt") with open(filename, "w") as file: file.write(str((r, p))) return r, p
def ccr_median(U, V): cca = CCA(n_components=5) U_c, V_c = cca.fit_transform(U, V) coef = np.abs(np.corrcoef(U_c.T, V_c.T).diagonal(offset=5)) return (np.median(coef))
def getVLADDescriptors(path, pathVD, pathCNNGT, pathColf): with open(pathVD, 'rb') as f: visualDictionary = pickle.load(f) # load cnn features with open(pathCNNGT, 'rb') as f: pkl = pickle.load(f) # fcs = pkl[2] scenefs = pkl[1] img_names = pkl[0] # update 3/22 VLAD on column feature with open(pathColf, 'rb') as f: vd_colf = pickle.load(f) descriptors = list() idImage = list() for imagePath in glob.glob(path + "/*.jpg"): print(imagePath) img = cv2.imread(imagePath) print(img_names.index(imagePath.split("/")[-1])) scenef = scenefs[img_names.index(imagePath.split("/")[-1])] print("scenef.shape = ") print(scenef.shape) # fc = fcs[img_names.index(imagePath.split("/")[-1])][0] # print("fc.shape = ") # print(fc.shape) # if scenef.shape[1]>2: # scenef = scenef[:,:2,:,:] # if scenef.shape[1]<2: # npad = ((0, 0), (0, 2-scenef.shape[1]), (0, 0), (0, 0)) # scenef = np.pad(scenef, pad_width=npad, mode='constant', constant_values=0) # if scenef.shape[2]>7: # scenef = scenef[:,:,:7,:] # if scenef.shape[2]<7: # npad = ((0, 0), (0, 0), (0, 7-scenef.shape[2]), (0, 0)) # scenef = np.pad(scenef, pad_width=npad, mode='constant', constant_values=0) colf = [] scenef = scenef[0] rows = scenef.shape[0] columns = scenef.shape[1] for i in range(rows): for j in range(columns): colf.append(scenef[i,j]) colf = np.asarray(colf) print(colf.shape) sift = cv2.xfeatures2d.SIFT_create() kp, des = sift.detectAndCompute(img, None) if np.any(des) != None: # and np.any(colf) != None v = VLAD(des, visualDictionary) vlad_colf = VLAD(colf, vd_colf) # mergedf = scenef.flatten() # mergedf = np.concatenate([v, scenef.flatten()]) # mergedf = np.concatenate([v, vlad_colf]) # mergedf = np.concatenate([v, fc]) # print("mergedf.shape = ") # print(mergedf.shape) print("==========Performing CCA==========") cca = CCA(n_components=1) v_c, vlad_colf_c = cca.fit_transform(v, vlad_colf) # print(v_c) print(v_c.shape) # print(vlad_colf_c) print(vlad_colf_c.shape) mergedf = np.concatenate([v_c, vlad_colf_c]) mergedf = mergedf.reshape(1, -1)[0] print("mergedf.shape = ") print(mergedf.shape) print("==================================") # descriptors.append(fc) # if '127696' in imagePath: # print(fc) descriptors.append(mergedf) idImage.append(imagePath) descriptors = np.asarray(descriptors) print(descriptors.shape) return descriptors, idImage
if not os.path.isdir(dir_name): os.makedirs(dir_name) OutputLog().set_path(dir_name) OutputLog().set_verbosity(configuration.output_parameters['verbosity']) data_config = ConfigParser.ConfigParser() data_config.read(data_set_config) data_parameters = ConfigSectionMap("dataset_parameters", data_config) # construct data set data_set = Container().create(data_parameters['name'], data_parameters) cca_model = CCA(n_components=top, scale=True, copy=False) train_transformed_x, train_transformed_y = cca_model.fit_transform(data_set.trainset[0], data_set.trainset[1]) test_transformed_x, test_transformed_y = cca_model.transform(data_set.testset[0], data_set.testset[1]) OutputLog().write('test results:') correlations, trace_correlation, var, x_test, y_test, test_best_layer = TraceCorrelationTester( data_set.testset[0], data_set.testset[1], top).test(IdentityTransformer(), configuration.hyper_parameters) OutputLog().write('train results:') correlations, train_trace_correlation, var, x_train, y_train, train_best_layer = TraceCorrelationTester( data_set.trainset[0], data_set.trainset[1], top).test(IdentityTransformer(), configuration.hyper_parameters) OutputLog().write('\nTest results : \n') configuration.hyper_parameters.print_parameters(OutputLog())
X, good_idx = remove_outliers(X, 6.0) y = y.ix[y.index[good_idx]] # sanity check # idx = np.random.permutation(len(y))[0] # idx = np.where(y.index == 119384)[0][0] # image_sanity_check(y.index[idx], X[idx]) # only keep unique values unique_cols = ['Class1.1', 'Class1.2', 'Class2.1', 'Class3.1', 'Class4.1', 'Class5.1', 'Class5.2', 'Class5.3', 'Class6.1', 'Class7.1', 'Class7.2', 'Class8.1', 'Class8.2', 'Class8.3', 'Class8.4', 'Class8.5', 'Class8.6', 'Class9.1', 'Class9.2', 'Class10.1', 'Class10.2', 'Class11.1', 'Class11.2', 'Class11.3', 'Class11.4', 'Class11.5'] # do CCA if verbose: print 'Doing CCA...' cca = CCA(n_components=len(unique_cols), copy=False) X_cca, y_cca = cca.fit_transform(X, y[unique_cols].values.astype(np.float32)) cPickle.dump(cca, open(base_dir + 'data/CCA_DCT.pickle', 'wb')) # make plots make_cca_images(cca, (100, 100), dct_idx=dct_idx) fig = plot_cca_projections(X_cca) fig.savefig(plot_dir + 'CCA_dist_no_outliers.png') if doshow: plt.show() print 'Saving the transformed values...' np.save(base_dir + 'data/CCA_training_transform', X_cca)