Esempio n. 1
0
    def test_CCA(self):
        X = [[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [3., 5., 4.]]
        Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
        df = pdml.ModelFrame(X, target=Y)

        mod1 = df.cross_decomposition.CCA(n_components=1)
        mod2 = cd.CCA(n_components=1)

        df.fit(mod1)
        mod2.fit(X, Y)

        # 2nd cols are different on travis-CI
        self.assert_numpy_array_almost_equal(mod1.x_weights_[:, 0],
                                             mod2.x_weights_[:, 0])
        self.assert_numpy_array_almost_equal(mod1.y_weights_[:, 0],
                                             mod2.y_weights_[:, 0])

        result = df.transform(mod1)
        expected = mod2.transform(X, Y)

        self.assertIsInstance(result, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(result.data.values.reshape(4),
                                             expected[0].reshape(4))
        self.assert_numpy_array_almost_equal(result.target.values.reshape(4),
                                             expected[1].reshape(4))
Esempio n. 2
0
    def __init__(self, X):
        self.X = X

        aux = input("Enter delimiting index between datasets: ")
        if not isinstance(int(aux), int):
            raise ValueError('Please input only integer values.')
        x = int(aux)

        columns = X.columns[1:]
        set1_columns = columns[:x]
        set2_columns = columns[x:]
        self.set1 = X[set1_columns].values
        self.set2 = X[set2_columns].values
        # write them to files in ada

        n = self.set1.shape[0]
        m = self.set1.shape[1]
        p = np.shape(self.set2)[1]
        self.noComponents = min(m, p)

        self.cca = skl.CCA(n_components=self.noComponents)
        self.cca.fit(self.set1, self.set2)

        # Compute factor loadings for both sets
        self.x_loadings_ = self.cca.x_loadings_
        self.y_loadings_ = self.cca.y_loadings_

        # Compute canonical scores for both sets
        self.x_scores = self.cca.x_scores_  # when written to file, separate values with "," to obtain a csv format
        self.y_scores = self.cca.y_scores_  # when written to file, separate values with "," to obtain a csv format

        # Compute the canonical correlation coefficients
        self.correlCoeffs = np.array([
            np.corrcoef(self.x_scores[:, i], self.y_scores[:, i],
                        rowvar=False)[0, 1] for i in range(self.noComponents)
        ])

        chi2_computed, chi2_estimated = st.bartlett_wilks(
            self.correlCoeffs, n, m, p, self.noComponents)

        self.chi2_computed_table = pd.DataFrame(
            chi2_computed,
            index=['r' + str(i) for i in range(1, self.noComponents + 1)],
            columns=['chi2_computed'])
        vis.correlogram(self.chi2_computed_table,
                        "Bartlett-Wilks significance test", 0)  # get in ada

        self.chi2_estimated_table = pd.DataFrame(
            chi2_estimated,
            index=['r' + str(i) for i in range(1, m + 1)],
            columns=['chi2_estimated'])
        vis.correlogram(self.chi2_estimated_table,
                        "Bartlett-Wilks significance test", 0)  # get in ada
Esempio n. 3
0
    def fit(self):
        # Set the dimensions, or shapes
        self.n, self.p = np.shape(self.x_data)
        self.q = np.shape(self.y_data)[1]
        self.m = min(self.p, self.q)

        # Build the model
        self.model = skl.CCA(n_components=self.m)
        self.model.fit(self.x_data, self.y_data)

        # Canonical scores
        self.x_scores = self.model.x_scores_  # z
        self.y_scores = self.model.y_scores_  # u
        return self
Esempio n. 4
0
t = t_emisii.join(other=t_electricitate,
                  how="inner",
                  lsuffix="_1",
                  rsuffix="_2")

x = t[var1].values
y = t[var2].values

# print("x:",x,"y:",y,sep="\n")
functii.inlocuire_nan(x)
functii.inlocuire_nan(y)
# Construire model analiza canonica
n, p = x.shape
q = y.shape[1]
m = min(p, q)
model_ac = cdec.CCA(n_components=m, scale=False)
model_ac.fit(x, y)
# Preluare rezultate
# Scoruri
z = model_ac.x_scores_
u = model_ac.y_scores_
# Normalizare scoruri
pp.normalize(z, axis=0, copy=False)
pp.normalize(u, axis=0, copy=False)
print("z:", z, "u:", u, sep="\n")
r = np.diagonal(np.corrcoef(z, u, rowvar=False)[:m, m:])
print("Corelatii canonice:", r)
p_values = functii.test_bartlett_wilks(r, n, p, q, m)
print("Test Bartlett. P-Values:", p_values)

ryu = np.corrcoef(y, u[:, :2], rowvar=False)[:q, q:]
Esempio n. 5
0
 def correlate_sklearn(self):
     print("CCA training...")
     skcca = cross_decomposition.CCA(n_components=self.k, max_iter=1000)
     skcca.fit(self.first_view_matrix, self.second_view_matrix)
     self.model = skcca
Esempio n. 6
0
def performCCA(k, training_set, target_set, fileNameX, fileNameY):
    cca = cd.CCA(n_components=k)
    newX_se, newY_se = cca.fit_transform(training_set, target_set)
    np.savetxt(fileNameX, newX_se)
    np.savetxt(fileNameY, newY_se)
Esempio n. 7
0
nume_instante = list(tabel.index)

# scap de val null
functii.inlocuire_nan(x)
functii.inlocuire_nan(y)

#print(x, y, sep="\n")

# construire model CCA -> Canonical Correlation Analysis
# calculez nr de radacini canonice
n,p = np.shape(x)
q = y.shape[1]
m = min(p, q) # nr de radacini canonice

# construiesc modelul
cca_model = sdec.CCA(m)
cca_model.fit(x, y)

# preluare rezultate si calcule
# preluare scoruri
z = cca_model.x_scores_
u = cca_model.y_scores_

# cele mai semnificative axe sunt de la 0 -> 1
#grafice.plot_scoruri(z[:, 0], z[:, 1], u[:, 0], u[:, 1], nume_instante)

# calcul corelatii canonice
r = np.diagonal(np.corrcoef(z, u, rowvar=False)[:m, m:]) # sunt asezate pe linii, nu pe coloane; am elementele de pe diagonala principala
print(r) # -> corelatiile

# aplicare test Bartlet-Wilks pentru semnificatie corelatii canonice
Esempio n. 8
0
import parsing as ps
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression

general_error = []
column_names = hs.column_names
X_men = ps.data_to_ml[column_names]
X_women = ps.data_to_test[column_names]

# X_moments = stats.moment(X_men, axis=0, moment=2, nan_policy='omit')
# Y_moment = stats.moment(lg.Y_men, axis=0, moment=2, nan_policy='omit')
# X_ts_moments = stats.moment(X_women, axis=0, moment=2, nan_policy='omit')
# Y_ts_moments = stats.moment(lg.Y_women, axis=0, moment=2, nan_policy='omit')

CCA = cross_decomposition.CCA(n_components=2, max_iter=500)
CCA.fit(X_men, lg.Y_men)
X_c, Y_c = CCA.transform(X_men, lg.Y_men)
Y_cca = [0 if x <= 0 else 1 for x in Y_c]
X_cca = pd.DataFrame(X_c)
Y_cca = pd.Series(Y_cca)
logreg = LogisticRegression()
logreg.fit(X_cca, Y_cca)

CCA.fit(X_women, lg.Y_women)
X_ts_c, Y_ts_c = CCA.transform(X_women, lg.Y_women)
Y_ts_cca = [0 if x <= 0 else 1 for x in Y_ts_c]
X_ts_cca = pd.DataFrame(X_ts_c)
# Y_ts_cca = pd.Series(Y_ts_cca)

#  try model on test sample
Esempio n. 9
0
# numele variabilelor
nume_instante = list(t.index)

# valorile variabilelor
x = t[var_x].values
y = t[var_y].values
# nr linii si coloane x
n, p = x.shape
# nr coloane y
q = y.shape[1]
# nr minim de coloane dintre cele doua vb
m = min(p, q)
# print(n, p, q, m, x, y, sep="\n")

# Creare model CCA - Canonical Correlation Analysis
cca_model = cdec.CCA(m)
cca_model.fit(x, y)

# Extragere rezultate
# Preluare scoruri
z = cca_model.x_scores_
u = cca_model.y_scores_
# Normalizare scoruri
pp.normalize(z, axis=0, copy=False)
pp.normalize(u, axis=0, copy=False)
# Calcul corelatii canonice

r = np.diagonal(np.corrcoef(z, u, rowvar=False)[:m, m:])
print("Corelatii canonice:", r)

# Calcul corelatii dintre variabilele observate si variabilele canonice