def test_CCA(self): X = [[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [3., 5., 4.]] Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]] df = pdml.ModelFrame(X, target=Y) mod1 = df.cross_decomposition.CCA(n_components=1) mod2 = cd.CCA(n_components=1) df.fit(mod1) mod2.fit(X, Y) # 2nd cols are different on travis-CI self.assert_numpy_array_almost_equal(mod1.x_weights_[:, 0], mod2.x_weights_[:, 0]) self.assert_numpy_array_almost_equal(mod1.y_weights_[:, 0], mod2.y_weights_[:, 0]) result = df.transform(mod1) expected = mod2.transform(X, Y) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.data.values.reshape(4), expected[0].reshape(4)) self.assert_numpy_array_almost_equal(result.target.values.reshape(4), expected[1].reshape(4))
def __init__(self, X): self.X = X aux = input("Enter delimiting index between datasets: ") if not isinstance(int(aux), int): raise ValueError('Please input only integer values.') x = int(aux) columns = X.columns[1:] set1_columns = columns[:x] set2_columns = columns[x:] self.set1 = X[set1_columns].values self.set2 = X[set2_columns].values # write them to files in ada n = self.set1.shape[0] m = self.set1.shape[1] p = np.shape(self.set2)[1] self.noComponents = min(m, p) self.cca = skl.CCA(n_components=self.noComponents) self.cca.fit(self.set1, self.set2) # Compute factor loadings for both sets self.x_loadings_ = self.cca.x_loadings_ self.y_loadings_ = self.cca.y_loadings_ # Compute canonical scores for both sets self.x_scores = self.cca.x_scores_ # when written to file, separate values with "," to obtain a csv format self.y_scores = self.cca.y_scores_ # when written to file, separate values with "," to obtain a csv format # Compute the canonical correlation coefficients self.correlCoeffs = np.array([ np.corrcoef(self.x_scores[:, i], self.y_scores[:, i], rowvar=False)[0, 1] for i in range(self.noComponents) ]) chi2_computed, chi2_estimated = st.bartlett_wilks( self.correlCoeffs, n, m, p, self.noComponents) self.chi2_computed_table = pd.DataFrame( chi2_computed, index=['r' + str(i) for i in range(1, self.noComponents + 1)], columns=['chi2_computed']) vis.correlogram(self.chi2_computed_table, "Bartlett-Wilks significance test", 0) # get in ada self.chi2_estimated_table = pd.DataFrame( chi2_estimated, index=['r' + str(i) for i in range(1, m + 1)], columns=['chi2_estimated']) vis.correlogram(self.chi2_estimated_table, "Bartlett-Wilks significance test", 0) # get in ada
def fit(self): # Set the dimensions, or shapes self.n, self.p = np.shape(self.x_data) self.q = np.shape(self.y_data)[1] self.m = min(self.p, self.q) # Build the model self.model = skl.CCA(n_components=self.m) self.model.fit(self.x_data, self.y_data) # Canonical scores self.x_scores = self.model.x_scores_ # z self.y_scores = self.model.y_scores_ # u return self
t = t_emisii.join(other=t_electricitate, how="inner", lsuffix="_1", rsuffix="_2") x = t[var1].values y = t[var2].values # print("x:",x,"y:",y,sep="\n") functii.inlocuire_nan(x) functii.inlocuire_nan(y) # Construire model analiza canonica n, p = x.shape q = y.shape[1] m = min(p, q) model_ac = cdec.CCA(n_components=m, scale=False) model_ac.fit(x, y) # Preluare rezultate # Scoruri z = model_ac.x_scores_ u = model_ac.y_scores_ # Normalizare scoruri pp.normalize(z, axis=0, copy=False) pp.normalize(u, axis=0, copy=False) print("z:", z, "u:", u, sep="\n") r = np.diagonal(np.corrcoef(z, u, rowvar=False)[:m, m:]) print("Corelatii canonice:", r) p_values = functii.test_bartlett_wilks(r, n, p, q, m) print("Test Bartlett. P-Values:", p_values) ryu = np.corrcoef(y, u[:, :2], rowvar=False)[:q, q:]
def correlate_sklearn(self): print("CCA training...") skcca = cross_decomposition.CCA(n_components=self.k, max_iter=1000) skcca.fit(self.first_view_matrix, self.second_view_matrix) self.model = skcca
def performCCA(k, training_set, target_set, fileNameX, fileNameY): cca = cd.CCA(n_components=k) newX_se, newY_se = cca.fit_transform(training_set, target_set) np.savetxt(fileNameX, newX_se) np.savetxt(fileNameY, newY_se)
nume_instante = list(tabel.index) # scap de val null functii.inlocuire_nan(x) functii.inlocuire_nan(y) #print(x, y, sep="\n") # construire model CCA -> Canonical Correlation Analysis # calculez nr de radacini canonice n,p = np.shape(x) q = y.shape[1] m = min(p, q) # nr de radacini canonice # construiesc modelul cca_model = sdec.CCA(m) cca_model.fit(x, y) # preluare rezultate si calcule # preluare scoruri z = cca_model.x_scores_ u = cca_model.y_scores_ # cele mai semnificative axe sunt de la 0 -> 1 #grafice.plot_scoruri(z[:, 0], z[:, 1], u[:, 0], u[:, 1], nume_instante) # calcul corelatii canonice r = np.diagonal(np.corrcoef(z, u, rowvar=False)[:m, m:]) # sunt asezate pe linii, nu pe coloane; am elementele de pe diagonala principala print(r) # -> corelatiile # aplicare test Bartlet-Wilks pentru semnificatie corelatii canonice
import parsing as ps import numpy as np from sklearn import preprocessing from sklearn.linear_model import LogisticRegression general_error = [] column_names = hs.column_names X_men = ps.data_to_ml[column_names] X_women = ps.data_to_test[column_names] # X_moments = stats.moment(X_men, axis=0, moment=2, nan_policy='omit') # Y_moment = stats.moment(lg.Y_men, axis=0, moment=2, nan_policy='omit') # X_ts_moments = stats.moment(X_women, axis=0, moment=2, nan_policy='omit') # Y_ts_moments = stats.moment(lg.Y_women, axis=0, moment=2, nan_policy='omit') CCA = cross_decomposition.CCA(n_components=2, max_iter=500) CCA.fit(X_men, lg.Y_men) X_c, Y_c = CCA.transform(X_men, lg.Y_men) Y_cca = [0 if x <= 0 else 1 for x in Y_c] X_cca = pd.DataFrame(X_c) Y_cca = pd.Series(Y_cca) logreg = LogisticRegression() logreg.fit(X_cca, Y_cca) CCA.fit(X_women, lg.Y_women) X_ts_c, Y_ts_c = CCA.transform(X_women, lg.Y_women) Y_ts_cca = [0 if x <= 0 else 1 for x in Y_ts_c] X_ts_cca = pd.DataFrame(X_ts_c) # Y_ts_cca = pd.Series(Y_ts_cca) # try model on test sample
# numele variabilelor nume_instante = list(t.index) # valorile variabilelor x = t[var_x].values y = t[var_y].values # nr linii si coloane x n, p = x.shape # nr coloane y q = y.shape[1] # nr minim de coloane dintre cele doua vb m = min(p, q) # print(n, p, q, m, x, y, sep="\n") # Creare model CCA - Canonical Correlation Analysis cca_model = cdec.CCA(m) cca_model.fit(x, y) # Extragere rezultate # Preluare scoruri z = cca_model.x_scores_ u = cca_model.y_scores_ # Normalizare scoruri pp.normalize(z, axis=0, copy=False) pp.normalize(u, axis=0, copy=False) # Calcul corelatii canonice r = np.diagonal(np.corrcoef(z, u, rowvar=False)[:m, m:]) print("Corelatii canonice:", r) # Calcul corelatii dintre variabilele observate si variabilele canonice