def scatter_2d(self) -> go.Figure: """ 2D scatter plot for clustered data """ fa = FactorAnalyzer(rotation='varimax', n_factors=2, method='ml') components = fa.fit_transform(self.df) total_var = self.pro_var.sum() * 100 return self._plot_scatter_2d(components, self.clustered_labels.cluster, total_var)
def FA(observied_variables, name): from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity chi_square_value, p_value = calculate_bartlett_sphericity( observied_variables) print("chi_square_value", chi_square_value, "p-value:", p_value) from factor_analyzer.factor_analyzer import calculate_kmo kmo_all, kmo_model = calculate_kmo(observied_variables) print("KMO value", kmo_model) # Create factor analysis object and perform factor analysis if name == 'phone': fa = FactorAnalyzer(n_factors=2) if name == 'QOL': fa = FactorAnalyzer(n_factors=4) fa.fit_transform(observied_variables) # Check Eigenvalues eigen_values, vectors = fa.get_eigenvalues() print(eigen_values) """ # Create scree plot using matplotlib plt.scatter(range(1,observied_variables.shape[1]+1),eigen_values) plt.plot(range(1,observied_variables.shape[1]+1),eigen_values) if name == 'phone': plt.title('Scree Plot for phone features',fontsize=24) if name == 'QOL': plt.title('Scree Plot for QOL features',fontsize=24) plt.xlabel('Factors', fontsize=18) plt.ylabel('Eigenvalue',fontsize=18) plt.grid() plt.show() """ loadings = fa.loadings_ print(pd.DataFrame(loadings, observied_variables.columns)) #print(pd.DataFrame(fa.get_communalities())) return pd.DataFrame(loadings, observied_variables.columns) # Get variance of each factors print( pd.DataFrame(fa.get_factor_variance(), ['SS Loadings', 'Proportion Var', 'Cumulative Var']))
def factor_analysis(self): fa = FactorAnalyzer(n_factors=self.N_factor, rotation=self.rotation, method=self.method) score = fa.fit_transform(self.dataset) header = ["Factor_%s" % i for i in range(1, self.N_factor + 1)] ### 因子負荷量 self.loadings = fa.loadings_ outf = "%s/factor_loadings.tsv" % self.outd df = pd.DataFrame(fa.loadings_, columns=header) df.to_csv(outf, sep="\t", index=False) self.logger.info("Facotr loadings are saved as %s." % outf) ### 因子得点 outf = "%s/factor_score.tsv" % self.outd df = pd.DataFrame(score, columns=header) df.to_csv(outf, sep="\t", index=False) self.logger.info("Facotr scores are saved as %s." % outf) return 0
step=1)) #change from 0-based array index to 1-based human-readable label plt.ylabel('Cumulative variance (%)') plt.title('The number of components needed to explain variance') plt.axhline(y=0.95, color='r', linestyle='-') plt.text(0.5, 0.85, '95% cut-off threshold', color='red', fontsize=16) ax.grid(axis='x') plt.show() # In[15]: #mismo PCA factor analyzer pero con otra librería" from factor_analyzer import FactorAnalyzer transformer2 = FactorAnalyzer(n_factors=3, rotation='varimax') X_transformed2 = transformer2.fit_transform(X) # In[16]: pc2 = pd.DataFrame(transformer2.loadings_, index=datamat) export_excel2 = pc2.to_excel('Downloads/pcvartotpais2021.xlsx', index=True, header=True) # In[ ]: pcaex = pd.DataFrame(transformer.components_, columns=datamat) export_excel2 = pcaex.to_excel('Downloads/pcaex.xlsx', index=True, header=True) # In[57]:
X2 = sm.add_constant(X_train_transformed) model = sm.OLS(y_train, X2) fii = model.fit() p_values = fii.summary2().tables[1]['P>|t|'] print("\nModel p-values: ") print(p_values) # Split data before it is transformed. from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(df, data['medv'], test_size=0.3, random_state=1) # Transform data with factor components. X_train_transformed = fa.fit_transform(X_train) X_test_tranformed = fa.transform(X_test) #---------------------------------------------------------- # Build first model #---------------------------------------------------------- # Train regression model on training data model = LinearRegression() model.fit(X_train_transformed, y_train) # Show model statistics. showModelSummary(model, y_test, X_test_tranformed) # Check coefficient significance. showCoefficientPValues(y_train, X_train_transformed)
sns.heatmap(df_corr, annot=True) plt.show() ################################################################################################# '''Week 3''' # Factor Analysis # from factor_analyzer import FactorAnalyzer fact = FactorAnalyzer(n_factors=2, rotation='promax') df_cor = pd.merge(df_cor, df_med) a_data = df_cor[[ 'hs_degree', 'median_age', 'second_mortgage', 'pct_own', 'bad_debt' ]] fact.fit_transform(a_data) ev, v = fact.get_eigenvalues() plt.plot(ev) plt.xticks(range(len(a_data.columns)), labels=['1', '2', '3', '4', '5']) plt.show() plt.plot(fact.loadings_) plt.xticks(range(len(a_data.columns)), labels=['1', '2', '3', '4', '5']) plt.show() fact.get_communalities() ################################################################################################## '''Week 4''' # Regression Analysis #