def runFactorAnalyzer(self, cols_to_norm, result): fa = FactorAnalyzer(rotation="varimax", n_factors=2) df = result[cols_to_norm] result = result.dropna() df = df.dropna() fa.fit(df) ev = fa.get_eigenvalues() kmo_all, kmo_model = calculate_kmo(df) if (kmo_model < 0.6): print("kmo_model: %s " % kmo_model) array = fa.transform(df) #print("Factors: %s" % (array)) #print("loadings: %s " % fa.loadings_) #print("eigenvalues: %s " % ev[0]) dataframe = pd.DataFrame(columns=[ 'Player', 'Session', 'Time', 'NegativeEmotion', 'PositiveEmotion' ]) print("T session: %s " % len(result['Session'])) dataframe['Session'] = result['Session'] dataframe['Player'] = result['Player'] dataframe['Time'] = result['ts'] dataframe['NegativeEmotion'] = np.around(array[:, 0], 2) dataframe['PositiveEmotion'] = np.around(array[:, 1], 2) dataframe.to_csv('/home/elton/Desktop/Dataset/MetricsEmotion.csv', sep=',', mode='a', header=False)
def test_analyze_rotation_value_error(self): data = pd.DataFrame({ 'A': [2, 4, 5, 6, 8, 9], 'B': [4, 8, np.nan, 10, 16, 18], 'C': [6, 12, 15, 12, 26, 27] }) fa = FactorAnalyzer(rotation='blah', n_factors=1) fa.fit(data)
def test_analyze_infinite(self): data = pd.DataFrame( { 'A': [1.0, 0.4, 0.5], 'B': [0.4, 1.0, float('inf')], 'C': [0.5, float('inf'), 1.0] }, index=['A', 'B', 'C']) fa = FactorAnalyzer(impute='drop', n_factors=1, is_corr_matrix=True) fa.fit(data)
def test_analyze_weights(self): data = pd.DataFrame({ 'A': [2, 4, 5, 6, 8, 9], 'B': [4, 8, 9, 10, 16, 18], 'C': [6, 12, 15, 12, 26, 27] }) fa = FactorAnalyzer(rotation=None) fa.fit(data) _ = fa.transform(data) expected_weights = np.array(([[0.33536334, -2.72509646, 0], [0.33916605, -0.29388849, 0], [0.33444588, 3.03060826, 0]])) assert_array_almost_equal(expected_weights, fa.weights_)
def test_analyze_impute_drop(self): data = pd.DataFrame({ 'A': [2, 4, 5, 6, 8, 9], 'B': [4, 8, np.nan, 10, 16, 18], 'C': [6, 12, 15, 12, 26, 27] }) expected = data.copy() expected = expected.dropna() expected_corr = expected.corr() expected_corr = expected_corr.values fa = FactorAnalyzer(rotation=None, impute='drop', n_factors=1) fa.fit(data) assert_array_almost_equal(fa.corr_, expected_corr)
def test_factor_variance(self): path = 'tests/data/test01.csv' data = pd.read_csv(path) fa = FactorAnalyzer(n_factors=3, rotation=None) fa.fit(data) loadings = fa.loadings_ n_rows = loadings.shape[0] # calculate variance loadings = loadings**2 variance = np.sum(loadings, axis=0) # calculate proportional variance proportional_variance_expected = variance / n_rows proportional_variance = fa.get_factor_variance()[1] assert_almost_equal(proportional_variance_expected, proportional_variance)
def test_analyze_rotation_value_error(self): fa = FactorAnalyzer(rotation='blah', n_factors=1) fa.fit(np.random.randn(500).reshape(100, 5))
def test_analyze_bad_svd_method(self): fa = FactorAnalyzer(svd_method='foo') fa.fit(np.random.randn(500).reshape(100, 5))
def get_factor_eigenvalues(df): fa = FactorAnalyzer(rotation=None) fa.fit(df) ev, v = fa.get_eigenvalues() return ev
if do_plot: sn.scatterplot(count_axis, pca.explained_variance_) plt.show() # ---------------------------------------------------------------------------- # Explore 2: 2D plot of all individuals using the 2D PCA vs 2D Factor analysis # ---------------------------------------------------------------------------- # PCA 2d pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train_scaled) plotIn2D(X_train_pca, 'principal component', 1) # FA 2d fa = FactorAnalyzer(rotation=None, n_factors=2) fa.fit(X_train_scaled) X_train_fa = fa.transform(X_train_scaled) plotIn2D(X_train_fa, 'Factor analysis', 2) # the following instruction shows the 2 graphs in 2D PCA and FA. # we notice that the graphs are very similar in distribution of the individuals # The plan allows a clear separation of Benin from Malign plt.show() # X_test_fa = fa.transform(X_test_scaled) # print("X_train_fa") # print(X_train_fa) # Method 1: Logistic regression # ----------------------------- logistic_regression = LogisticRegression()