def do_cca(X, y, X_orig, n_components=10, permutations=10): ''' Performs a CCA using components Projects scores back to edge space ''' cca = CCA(n_components=n_components) cca.fit(X, y) # save the latent component correlation cca.mode_r = [] for component in range(n_components): cca.mode_r.append( np.corrcoef(cca.x_scores_[:, component], cca.y_scores_[:, component])[0, 1]) # correlate behaviour with LC score cca.y_score_correlation = np.zeros((np.shape(y)[1], n_components)) for component in range(n_components): for beh in range(np.shape(y)[1]): cca.y_score_correlation[beh, component] = np.corrcoef( y[:, beh].T, cca.y_scores_[:, component])[0, 1] # correlate edges with LC score cca.x_score_correlation = np.zeros((np.shape(X_orig)[1], n_components)) for component in range(n_components): cca.x_score_correlation[:, component] = np.corrcoef( cca.x_scores_[:, component], X_orig.T)[1::, 0] # non parametric max T tests for component significance max_r = [] for perm in tqdm(range(permutations)): #shuffle the behaviour for each permutation y_shuffle = shuffle(y) #perform a new CCA with shuffled data cca_perm = [] cca_perm = CCA(n_components=n_components) cca_perm.fit(X, y_shuffle) # save the latent component correlation mode_r_perm = [] for component in range(n_components): mode_r_perm.append( np.corrcoef(cca_perm.x_scores_[:, component], cca_perm.y_scores_[:, component])[0, 1]) # take the max r value max_r.append(np.max(mode_r_perm)) # Compute adjusted p-values via percentile p_adj = [] for component in range(n_components): p_adj.append(np.mean(max_r >= cca.mode_r[component])) return cca, p_adj
def run(data, template, reference): n_components = 1 corrs = [] cca = CCA(n_components) for target in range(0, len(st.frequencies)): results = [] X = data cor, _, xweights, yweights = su.find_correlation_for_one_pair( cca, 1, data, template[target, :, :]) cor_ref, _, xweights_ref, yweights_ref = su.find_correlation_for_one_pair( cca, 1, data, reference[target, :, :]) cor_ref_tem, _, xweights_ref_tem, yweights_ref_tem = su.find_correlation_for_one_pair( cca, 1, np.squeeze(template[target, :, :]), reference[target, :, :]) corr_t = get_cor_template(X, template[target, :, :], xweights_ref, xweights_ref) corr_ref_temp = get_cor_template(X, template[target, :, :], xweights_ref_tem, xweights_ref_tem) corr_temp = get_cor_template(np.squeeze(template[target, :, :]), template[target, :, :], xweights, yweights) results = [cor, cor_ref, corr_t, corr_ref_temp, corr_temp] corsum = map(lambda x: np.sign(x) * x**2, results) corsum = list(corsum) corsum = np.sum(corsum) corrs.append(np.sum(corsum)) pre = np.argmax(corrs) return pre
def fit(self, model): """Fits the model and creates a (random) orthogonal transformation. Args: model: string; a value in ["logistic", "svm", "linear", "svr", "cca"] """ if model == 'linear': self.mod = LinearRegression() elif model == 'logistic': self.mod = LogisticRegression(penalty='none', class_weight='balanced', solver='saga') #self.mod = LogisticRegression() elif model == 'svr': self.mod = SVR(kernel='linear') elif model == 'svm': self.mod = SVC(C=1.0, kernel='linear') elif model == 'cca': self.mod = CCA(n_components=1, scale=True, max_iter=500, tol=1e-06, copy=True) self.mod.intercept_ = 0.0 self.mod.fit(self.Xrel, self.Yrel) # now compute T with a random orthogonal basis # todo potential bug: what to do with the intercept_? w0 = self.mod.coef_ # + self.mod.intercept_ if len(w0.shape) < 2: w0 = w0.reshape(1, -1) w0 = w0 / np.linalg.norm(w0) Wcompl = null_space(w0) self.T = np.hstack((w0.transpose(), Wcompl)) assert np.allclose(self.T.transpose().dot(self.T), np.eye(self.T.shape[0])), "self.T not orthonormal."
def compCorrCoefs(self, learningSet, EEGSignals): n_components = 1 cca = CCA(n_components) #print(EEGSignals.shape) ''' correlation14 = abs(np.corrcoef(np.mean(learningSet[0:3].T, axis=1),np.mean(EEGSignals.T, axis=1))[0, 1]) correlation28 = abs(np.corrcoef(np.mean(learningSet[3:6].T, axis=1),np.mean(EEGSignals.T, axis=1))[0, 1]) correlation8 = abs(np.corrcoef(np.mean(learningSet[6:9].T, axis=1),np.mean(EEGSignals.T, axis=1))[0, 1]) print(learningSet[0][0],learningSet[1][0],learningSet[2][0]) for i in range(0,9,3): print(abs(np.corrcoef(learningSet[i].T,EEGSignals[int(i/3)].T)[0, 1]), abs(np.corrcoef(learningSet[i+1].T,EEGSignals[int(i/3)].T)[0, 1]), abs(np.corrcoef(learningSet[i+2].T,EEGSignals[int(i/3)].T)[0, 1])) print("---") ''' cca.fit(learningSet[0:3].T, EEGSignals.T) U, V = cca.transform(learningSet[0:3].T, EEGSignals.T) correlation14 = abs(np.corrcoef(U.T, V.T)[0, 1]) cca.fit(learningSet[3:6].T, EEGSignals.T) U, V = cca.transform(learningSet[3:6].T, EEGSignals.T) correlation28 = abs(np.corrcoef(U.T, V.T)[0, 1]) cca.fit(learningSet[6:9].T, EEGSignals.T) U, V = cca.transform(learningSet[6:9].T, EEGSignals.T) correlation8 = abs(np.corrcoef(U.T, V.T)[0, 1]) return correlation14, correlation28, correlation8
def train_eval(self, train_index, test_index, ignore_eval=False): normalized_train, normalized_test = normalize_by_train(self.source[train_index], self.source[test_index]) if self.comp is not None: if self.use_scikit is not None: if self.use_scikit == 'cca': dim_reduction = CCA(n_components=self.comp) else: dim_reduction = PCA(n_components=self.comp) # fit cca according to train data only dim_reduction.fit(normalized_train, self.target[train_index]) # convert source into lower dimensional representation normalized_train = dim_reduction.transform(normalized_train) normalized_test = dim_reduction.transform(normalized_test) else: _, wa, _ = tutorial_on_cca(normalized_train, self.target[train_index]) normalized_train = normalized_train @ wa[:, :self.comp] normalized_test = normalized_test @ wa[:, :self.comp] model = self.build_model() model.fit(normalized_train, self.target[train_index]) prediction = model.predict(normalized_test) # res_df.to_csv(f"{self.out_name}/res1.csv") if not ignore_eval: return self.evaluate_regression(prediction, test_index) else: return prediction
def __init__(self, model_name, model_type, n_clusters=None, n_components=None, n_lag=None, regularisation=None): self.n_lag = n_lag self.model_name = model_name self.model_type = model_type # self.n_clusters = n_clusters # self.clustering = NeuronClustering(self.n_clusters, signal_correlation) if model_name == 'cca': self.n_components = n_components self.model = CCA(n_components=self.n_components) elif model_name == 'linear-regression': if regularisation is None: self.model = LinearRegression() elif regularisation == 'l1': self.model = Lasso() elif regularisation == 'l2': self.model = Ridge() elif regularisation == 'l1l2': self.model = ElasticNet() else: raise NotImplementedError
def test_cca(): """Test CCA.""" # Compare results with Matlab # x = np.random.randn(1000, 11) # y = np.random.randn(1000, 9) # x = demean(x).squeeze() # y = demean(y).squeeze() mat = loadmat('./tests/data/ccadata.mat') x = mat['x'] y = mat['y'] A2 = mat['A2'] B2 = mat['B2'] A1, B1, R = nt_cca(x, y) # if mean(A1(:).*A2(:))<0; A2=-A2; end X1 = np.dot(x, A1) Y1 = np.dot(y, B1) C1 = tscov(np.hstack((X1, Y1)))[0] # Sklearn CCA cca = CCA(n_components=9, scale=False, max_iter=1e6) X2, Y2 = cca.fit_transform(x, y) # C2 = tscov(np.hstack((X2, Y2)).T)[0] # import matplotlib.pyplot as plt # f, (ax1, ax2) = plt.subplots(2, 1) # ax1.imshow(C1) # ax2.imshow(C2) # plt.show() # assert_almost_equal(C1, C2, decimal=4) # Compare with matlab X2 = np.dot(x, A2) Y2 = np.dot(y, B2) C2 = tscov(np.hstack((X2, Y2)))[0] assert_almost_equal(C1, C2)
def compute_SVCCA(activation1, activation2): ''' activation1 - Activation array 1 as a numpy array of size n X m1 activation2 - Activation array 2 as a numpy array of size n X m2 ''' pca_r = 40 # value from Shi et al NeurIPS 2019 n = activation1.shape[0] assert n == activation2.shape[ 0], "Size of activation arrays are different!!" if pca_r > activation1.shape[1]: print( "Activation 1 array has less neurons.. changing number of PCs to ", activation1.shape[1]) pca_r = activation1.shape[1] if pca_r > activation2.shape[1]: print( "Activation 2 array has less neurons.. changing number of PCs to ", activation2.shape[1]) pca_r = activation2.shape[1] pca1 = PCA(n_components=pca_r) red_activation1 = pca1.fit_transform(activation1) pca2 = PCA(n_components=pca_r) red_activation2 = pca2.fit_transform(activation2) cca = CCA(n_components=pca_r) red_activation1_c, red_activation2_c = cca.fit_transform( red_activation1, red_activation2) corr_values = np.zeros(pca_r) for idx in range(pca_r): corr_values[idx] = np.corrcoef( red_activation1_c[:, idx], red_activation2_c[:, idx])[0, 1] # get the off-diagonal element return np.mean(corr_values)
def perform(arrs): blocks_cnt = sum([arr.shape[1] * arr.shape[2] for arr in arrs]) X = np.zeros((blocks_cnt, 16)) Y = np.zeros((blocks_cnt, 64)) for c in range(3): for i, arr in enumerate(arrs): height = arr.shape[1] width = arr.shape[2] for y in range(height): for x in range(width): X[y * width + x] = np.hstack( [arr[c][y][x - 1][:][-1], arr[c][y - 1][x][-1][:]]) Y[y * width + x] = arr[c][y][x].ravel() X_mc = (X - X.mean()) / (X.std()) Y_mc = (Y - Y.mean()) / (Y.std()) ca = CCA(n_components=1) ca.fit(X_mc, Y_mc) print(f'\nColor {c}:') weights = ca.x_weights_.ravel() print(weights.shape) print(', '.join(map(lambda a: str(a), weights))) print(ca.n_iter_)
def cca_classify(X_eeg_signals, Yi_frequency_signals): cca = CCA(1) corr_results = [] for fr in range(0, Yi_frequency_signals.shape[0]): X = X_eeg_signals Yi = Yi_frequency_signals[fr, :, :] #计算X与Yi之间的相关性 cca.fit(X.T, np.squeeze(Yi).T) X_train_r, Yi_train_r = cca.transform(X.T, np.squeeze(Yi).T) corr = np.corrcoef(X_train_r[:, 0], Yi_train_r[:, 0])[0, 1] #得出X与每个Yi的相关性 corr_results.append(corr) if corr_results[np.argmax(corr_results)] > 0.50: #设置阈值 global index global all_data classify_result = np.argmax(corr_results) + 1 print(corr_results) index += 1 #保存数据 TT = pd.DataFrame(X_eeg_signals) all_data = all_data.append(np.transpose(TT[1:9])) if index == 50: #保存数据 all_data = pd.DataFrame(all_data) all_data.to_csv('./j_8_all_data.csv', index=False) return classify_result else: return -1
def Initialize(self): self.configs = self.Config[0] self.Channellist = self.configs['Channellist'] self.Trigger = {'state': 0, 'code': 0} #注册trigger self.eeg = np.empty(0) self.trigger = np.empty(0) # self.mudname = rootdir+'/src/mud/MUD.mat' # self.sigparm = sio.loadmat(self.mudname) # self.FilterLR = self.sigparm['FilterLR'] n = 3 MdB = 20 bprange = np.array([6.0, 35.0]) Ws = bprange / (self.configs['SamplingRate'] / 2) self.b, self.a = scipy.signal.iirfilter(n, Ws, rs=MdB, ftype='cheby2') # Hd_Bandpass self.prodata = np.empty(0) self.frequency = [9, 11.7, 14.5] t = np.arange(0.005, 5.5, 0.005) self.Y = {} for i in range(len(self.frequency)): y = np.array([ np.sin(2 * np.pi * self.frequency[i] * t), np.cos(2 * np.pi * self.frequency[i] * t), np.sin(4 * np.pi * self.frequency[i] * t), np.cos(4 * np.pi * self.frequency[i] * t), np.sin(6 * np.pi * self.frequency[i] * t), np.cos(6 * np.pi * self.frequency[i] * t) ]) self.Y[str(i)] = y self.rank_min = min(len(self.Channellist), self.Y[str(0)].shape[0]) self.cca = CCA(n_components=self.rank_min) self.score_threshold = 0.25 #####load .mat
def doCCA(metrics, color): inp = np.array([metrics[m] for m in metricsInput2]).T.astype(float) out = np.array([metrics[m] for m in metricsOutput2]).T.astype(float) inp0 = np.zeros(len(metricsInput2)) out0 = np.zeros(len(metricsOutput2)) inp = np.vstack((inp, inp0)) out = np.vstack((out, out0)) cca = CCA(n_components=1, scale=False) cca.fit(inp, out) inp_cca = inp.dot(cca.x_weights_) out_cca = out.dot(cca.y_weights_) # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(inp_cca, out_cca) cca_regr = regr.predict(inp_cca) # The coefficients print('Coefficients: \n', regr.coef_) plt.scatter(inp_cca, out_cca, c=color) plt.plot(inp_cca, cca_regr, color=color, linewidth=0.5) logging.info('cca') logging.info(cca.x_rotations_) logging.info(cca.y_rotations_)
def distance(s1, s2, type="dist", x=None): # TODO: type check if (type == "dist"): mat_1 = np.matmul( np.matmul(s1, np.linalg.solve(np.matmul(np.transpose(s1), s1))), np.transpose(s1)) mat_2 = np.matmul( np.matmul(s2, np.linalg.solve(np.matmul(np.transpose(s2), s2))), np.transpose(s2)) return math.sqrt(np.sum(np.subtract(mat_1, mat_2))**2) if (type == "trace"): mat_1 = np.matmul( np.matmul(s1, np.linalg.solve(np.matmul(np.transpose(s1), s1))), np.transpose(s1)) mat_2 = np.matmul( np.matmul(s2, np.linalg.solve(np.matmul(np.transpose(s2), s2))), np.transpose(s2)) return np.sum(np.diag(np.matmul(mat_1, mat_2))) / helper.n_col(s1) if (type == "canonical"): if (x == None): raise Exception("x must be specified if use type = 'canonical'") if (helper.n_col(x) != helper.n_row(s1)): raise Exception("Dimension of x is not correct.") cca = CCA(n_components=1) return np.mean(cca.fit(np.matmul(x, s1), np.matmul(x, s2)))
def __init__(self): # 알고리즘 이름 self._name = 'cca' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = CCA() # 모델 학습 self._model.fit(self._x_train, self._y_train)
def train_eval(self, train_index, test_index): train_source, test_source = self.source[train_index], self.source[ test_index] train_target, test_target = self.target[train_index], self.target[ test_index] train_source, test_source = scale_train_test(train_source, test_source) train_target, _ = scale_train_test(train_target, test_target) # rho, w_t, w_s, _ = evaluate_cca_wa_wb(train_target, train_source) cca = CCA(n_components=min(train_source.shape[1], train_target.shape[1]), max_iter=1000) cca.fit(train_source, train_target) w_s = cca.x_rotations_ w_t = cca.y_rotations_ predicted_target = test_source @ w_s @ np.linalg.pinv(w_t) predicted_target = unscale_prediction(train_target, predicted_target) if self.target_encoder is not None: test_target = self.original_target[test_index] predicted_target = self.target_encoder.decode( torch.as_tensor(predicted_target)).detach().numpy() scores = np.zeros(self.original_target.shape[1]) for i in range(self.original_target.shape[1]): predicted = predicted_target[:, i] actual = test_target[:, i] r, pval = pearsonr(predicted, actual) scores[i] = r return scores
def rdc1(x, y, k=10, s=0.2): if len(x.shape) == 1: x = x.reshape((-1, 1)) if len(y.shape) == 1: y = y.reshape((-1, 1)) cx = np.column_stack([rankdata(xc, method='ordinal') for xc in x.T]) / float(x.size) cy = np.column_stack([rankdata(yc, method='ordinal') for yc in y.T]) / float(y.size) # Add a vector of ones so that w.x + b is just a dot product O = np.ones(cx.shape[0]) X = np.column_stack([cx, O]) Y = np.column_stack([cy, O]) Rx = (s / X.shape[1]) * np.random.randn(X.shape[1], k) Ry = (s / Y.shape[1]) * np.random.randn(Y.shape[1], k) X = np.dot(X, Rx) Y = np.dot(Y, Ry) X = np.sin() """print rcancor(np.sin(X),np.sin(Y)) return 0" """ cca = CCA(n_components=1) xc, yc = cca.fit_transform(X, Y) result = np.corrcoef(xc.T, yc.T)[0, 1] print(result)
def visualize_with_cca(X, y, title): cca = CCA(n_components=2) cca.fit(X, y) X_cca = cca.transform(X) Xax = X_cca[:, 0] Yax = X_cca[:, 1] labels = (y > 0).astype(int) cdict = {0: 'red', 1: 'green'} labl = {0: 'home_loss', 1: 'home_win'} marker = {0: '*', 1: 'o'} alpha = {0: .3, 1: .5} fig, ax = plt.subplots(figsize=(7, 5)) fig.patch.set_facecolor('white') for l in np.unique(labels): ix = np.where(labels == l) ax.scatter(Xax[ix], Yax[ix], c=cdict[l], s=40, label=labl[l], marker=marker[l], alpha=alpha[l]) plt.xlabel("First Principal Component", fontsize=14) plt.ylabel("Second Principal Component", fontsize=14) plt.legend() plt.title(title) plt.show()
def find_correlation_cca_method1(signal, reference_signals, n_components=2): r""" Perform canonical correlation analysis (CCA) Reference: https://github.com/aaravindravi/Brain-computer-interfaces/blob/master/notebook_12_class_cca.ipynb Args: signal : ndarray, shape (channel,time) Input signal in time domain reference_signals : ndarray, shape (len(flick_freq),2*num_harmonics,time) Required sinusoidal reference templates corresponding to the flicker frequency for SSVEP classification n_components : int, default: 2 number of components to keep (for sklearn.cross_decomposition.CCA) Returns: result : array, size: len(flick_freq) Probability for each reference signals Dependencies: CCA : sklearn.cross_decomposition.CCA np : numpy package """ cca = CCA(n_components) corr = np.zeros(n_components) result = np.zeros(reference_signals.shape[0]) for freq_idx in range(0, reference_signals.shape[0]): cca_x = signal.T cca_y = np.squeeze(reference_signals[freq_idx, :, :]).T cca.fit(cca_x, cca_y) a, b = cca.transform(cca_x, cca_y) for ind_val in range(0, n_components): corr[ind_val] = np.corrcoef(a[:, ind_val], b[:, ind_val])[0, 1] result[freq_idx] = np.max(corr) return result
def CCA_project_vectors(args, src_dico, tgt_dico, src_full, tgt_full, src_train, tgt_train, NUM_dim=100): print('Exporting embeddings...') OutputDir = "output/{}-{}/".format(args.src_lang, args.tgt_lang) if not os.path.exists(OutputDir): os.makedirs(OutputDir) cca = CCA(n_components=NUM_dim) print("Fitting...") cca.fit(src_train, tgt_train) print(cca.get_params()) X_c, Y_c = cca.transform(src_full, tgt_full) src_out, tgt_out = utils.norm_embeddings(X_c), utils.norm_embeddings(Y_c) print("Exporting embeddings...") utils.export_embeddings(src_dico[0], src_out, OutputDir + 'projected.{}'.format(args.src_lang)) utils.export_embeddings(tgt_dico[0], tgt_out, OutputDir + 'projected.{}'.format(args.tgt_lang)) print("work over!")
def __init__(self, controller=None, num_targets=8, num_seconds=3): self.controller = controller self.num_targets = num_targets self.num_seconds = num_seconds # number of seconds for a stimulus cycle self.sampling_rate = 128.0 # frequencies calculated by frames/len(array) as seen in flicker_patterns.txt # or frequencies are set from paper we are basing our experiment from # check for either 4 or 8 targets if self.num_targets == 8: self.frequencies = np.asarray( [43.0, 37.0, 29.0, 21.0, 17.0, 11.0, 8.0, 5.0]) elif self.num_targets == 4: # up, down, right, left from paper # NOTE: using old csv files with these new frequencies is not valid self.frequencies = [15.0, 12.0, 8.57, 5.45] else: print("cca did not get a good target number.") # prediction should be targets 1 to num_targets, not 0 to num_targets - 1 # based on command_to_keyboard action self.prediction = None self.ref_signals = [] self.getAllReferenceSignals() self.cca = CCA(n_components=1) self.fig = None self.ax = None self.plotter = None self.filter_obj = Filter()
def plot_subfigure(X, Y, subplot, title, transform): if transform == "pca": X = PCA(n_components=2).fit_transform(X) elif transform == "cca": X = CCA(n_components=2).fit(X[0:len(Y),:], Y).transform(X) else: raise ValueError min_x = np.min(X[:, 0]) max_x = np.max(X[:, 0]) min_y = np.min(X[:, 1]) max_y = np.max(X[:, 1]) plt.subplot(1, 2, subplot) plt.title(title) zero_class = np.where(Y[:, 0]) one_class = np.where(Y[:, 1]) two_class = np.where(Y[:, 2]) three_class = np.where(Y[:, 3]) plt.scatter(X[:, 0], X[:, 1], s=40, c='gray') plt.scatter(X[zero_class, 0], X[zero_class, 1], s=160, edgecolors='b', facecolors='none', linewidths=2, label='Class 1')
def map_spaces(self, algo, src_mapped_embed=None, trg_mapped_embed=None): # (There may be duplicates in self.shared_vocab_src and/or self.shared_vocab_trg, # swap_vocab can be used to only inspect one-to-one translations) src_embed = self.model_src[self.shared_vocab_src] trg_embed = self.model_trg[self.shared_vocab_trg] os.makedirs(algo, exist_ok=True) if algo == "procrustes": logging.info( "Calculating Rotation Matrix (Procrustes Problem) and applying it to first embedding" ) #ortho, _ = orthogonal_procrustes(src_embed, trg_embed) # does the same as u, _, vt = np.linalg.svd(trg_embed.T.dot(src_embed)) w = vt.T.dot(u.T) self.model_src.vectors.dot(w, out=self.model_src.vectors) elif algo == "noise": logging.info( "Calculating Rotation Matrix with noise aware algorithm and applying it to first embedding" ) transform_matrix, alpha, clean_indices, noisy_indices = noise_aware( src_embed, trg_embed) #write cleaned vocab to file with open("vocab.clean.txt", 'w') as v: for src, trg in np.asarray(self.shared_vocab)[clean_indices]: v.write("{}\t{}\n".format(src, trg)) self.model_src.vectors.dot(transform_matrix, out=self.model_src.vectors) logging.info("Percentage of clean indices: {}".format(alpha)) elif algo == "cca": logging.info( "Calculating Mapping based on CCA and applying it to both embeddings" ) cca = CCA(n_components=100, max_iter=5000) cca.fit(src_embed, trg_embed) self.model_src.vectors, self.model_trg.vectors = cca.transform( self.model_src.vectors, self.model_trg.vectors) elif algo == "gcca": logging.info( "Calculating Mapping based on GCCA and applying it to both embeddings" ) gcca = GCCA() gcca.fit([src_embed, trg_embed]) transform_l = gcca.transform_as_list( (self.model_src.vectors, self.model_trg.vectors)) # gcca computes positive and negative correlations (eigenvalues), sorted in ascending order. # We are only interested in the positive portion self.model_src.vectors = transform_l[0][:, 100:] self.model_trg.vectors = transform_l[1][:, 100:] # save transformed model(s) if src_mapped_embed: self.model_src.save(os.path.join(algo, src_mapped_embed)) if trg_mapped_embed: self.model_trg.save(os.path.join(algo, trg_mapped_embed))
def rdc_cca(indexes): i, j, rdc_features = indexes cca = CCA(n_components=1, max_iter=CCA_MAX_ITER) X_cca, Y_cca = cca.fit_transform(rdc_features[i], rdc_features[j]) rdc = np.corrcoef(X_cca.T, Y_cca.T)[0, 1] # logger.info(i, j, rdc) return rdc
def new_cca_ds(A, B, n_components=1): # http://onlinelibrary.wiley.com/doi/10.1002/cem.2637/abstract model = CCA(n_components=n_components, scale=False).fit(B, A) F1 = np.linalg.pinv(model.x_scores_).dot(model.y_scores_) F2 = np.linalg.pinv(model.y_scores_).dot(A) P = ct.multi_dot((model.x_weights_, F1, F2)) return P, B.dot(P)
def fit_cca(self, outfile=''): # fits linear CCA constraint and replaces pretrained name embeddings with CCA transformed embeddings self.load_embeddings() self.extract_pretrained_prototype_embeddings() items, vectors = zip( *[(k, v) for k, v in self.pretrained_prototype_embeddings.items() if k in self.exemplar_to_concept]) concept_embs = Reach(vectors, items) train_vectors = [] for x in items: train_vectors.append(self.train_embeddings[x]) train_vectors = Reach.normalize(train_vectors) cca = CCA(n_components=self.train_embeddings.size, max_iter=10000) cca.fit(train_vectors, concept_embs.norm_vectors) # transform all name embeddings using the CCA mapping all_name_embeddings = deepcopy(self.pretrained_name_embeddings) items = [x for _, x in sorted(all_name_embeddings.indices.items())] projected_name_embeddings = cca.transform( all_name_embeddings.norm_vectors) new_name_embeddings = Reach(projected_name_embeddings, items) self.pretrained_name_embeddings = new_name_embeddings self.load_embeddings() if outfile: with open('{}_cca.p', 'wb') as f: pickle.dump(cca, f)
def get_cca(X, Y, n_comp=10): cca = CCA(n_components=n_comp) print("X.shape", X.shape) print("Y.shape", Y.shape) x_scores, y_scores = cca.fit_transform(X, Y) # Manual Transform X -= cca.x_mean_ X /= cca.x_std_ Y -= cca.y_mean_ Y /= cca.y_std_ calc_scores_x = np.dot(X, cca.x_rotations_) calc_scores_y = np.dot(Y, cca.y_rotations_) # id_x = cca.x_rotations_ @ linalg.pinv2(cca.x_rotations_) # id_y = cca.y_rotations_ @ linalg.pinv2(cca.y_rotations_) print("x_scores.shape", x_scores.shape) print("y_scores.shape", y_scores.shape) correlations = np.diag( np.corrcoef(x_scores, y_scores, rowvar=False)[:n_comp, n_comp:]) calc_correlations = np.diag( np.corrcoef(calc_scores_x, calc_scores_y, rowvar=False)[:n_comp, n_comp:]) print(correlations) print(calc_correlations) return x_scores, y_scores
def main(args): (training_file, label_file, test_file, u_file, e, c, output_file, components) = args X_training = load_feat(training_file) n = len(X_training) U = load_feat(u_file) y_training = [float(line.strip()) for line in open(label_file)] U = np.asarray(U) X_training = np.asarray(X_training) #X = preprocessing.normalize(X, norm='l2') y_training = np.asarray(y_training) X_test = load_feat(test_file) y_test = [float(line.strip()) for line in open(test_label)] X_test = np.asarray(X_test) X_test[np.isnan(X_test)] = 0.0 #test_X = preprocessing.normalize(test_X, norm='l2') y_test = np.asarray(y_test) s = min(len(X_training), len(U)) cca = CCA(n_components=components, max_iter=50) (X_cca, U_cca) = cca.fit_transform(X_training[:s], U[:s]) X_test_cca = cca.transform(X_test) svr = SVR(C=c, epsilon=e, kernel='rbf') svr.fit(X_cca, y_training[:s]) pred = svr.predict(X_test_cca) with open(output_file, 'w') as output: for p in pred: print >> output, p return
def cca(m1, m2, preprocessing=None): """ Use CCA to decompose two views and plot result. Params: m1, m2: Every column is a example with every row as a feature. preprocessing: If None, we don't do pre-processing; if 'orth', we adjust center to 0 and perform PCA. """ # Adjust means to be 0 and perform PCA. if preprocessing == "orth": # Zero means. m1 -= np.mean(m1, axis=1, keepdims=True) # print("m1=", np.sum(m1, axis=1)) m2 -= np.mean(m2, axis=1, keepdims=True) # PCA. cca = CCA(n_components=3, max_iter=100) cca.fit(m1.T, m2.T) X_c = cca.transform(m1.T) fig, ax = plt.subplots() ax.set_title('Fig.2.(c)') # ax.set_color_cycle(['blue', 'green', 'red']) ax.set_prop_cycle('color', ['blue', 'red', 'green']) ax.plot(X_c) # ax.plot(Y_c) plt.show()
def cca_coef(X, Y): ''' Apply CCA method to compute inter_channel correlation :param X: data 1 (n_events, n_epochs, n_chans, n_times) :param Y: data 2 (actually equal to data 1) ''' cca = CCA(n_components=1)
def fbcca(eeg, list_freqs, fs, num_harms=3, num_fbs=5): fb_coefs = np.power(np.arange(1,num_fbs+1),(-1.25)) + 0.25 num_targs, _, num_smpls = eeg.shape #40 taget (means 40 fre-phase combination that we want to predict) y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms) cca = CCA(n_components=1) #initilize CCA # result matrix r = np.zeros((num_fbs,num_targs)) results = np.zeros(num_targs) for targ_i in range(num_targs): test_tmp = np.squeeze(eeg[targ_i, :, :]) #deal with one target a time for fb_i in range(num_fbs): #filter bank number, deal with different filter bank testdata = filterbank(test_tmp, fs, fb_i) #data after filtering for class_i in range(num_targs): refdata = np.squeeze(y_ref[class_i, :, :]) #pick corresponding freq target reference signal test_C, ref_C = cca.fit_transform(testdata.T, refdata.T) # len(row) = len(observation), len(column) = variables of each observation # number of rows should be the same, so need transpose here # output is the highest correlation linear combination of two sets r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) #return r and p_value, use np.squeeze to adapt the API r[fb_i, class_i] = r_tmp rho = np.dot(fb_coefs, r) #weighted sum of r from all different filter banks' result tau = np.argmax(rho) #get maximum from the target as the final predict (get the index) results[targ_i] = tau #index indicate the maximum(most possible) target return results