コード例 #1
0
import statsmodels.discrete.discrete_model as sm_mod
import statsmodels.graphics.gofplots as plots
import matplotlib.pyplot as plt
import statsmodels.tools.tools as smtools
import sklearn.metrics as skm
from sklearn.model_selection import train_test_split
from factor_analyzer import FactorAnalyzer
from factor_analyzer import (ConfirmatoryFactorAnalyzer, ModelSpecificationParser)
from factor_analyzer.utils import (corr, impute_values, partial_correlations, smc)

data1 = pd.read_csv("https://donatello-telesca.squarespace.com/s/Exposure-t4yx.csv")

# Perform Factor Analysis
fa = FactorAnalyzer()
# fa.set_params(n_factors=6,rotation=None)
fa.set_params(n_factors=6,rotation='varimax')
fa.fit(data1)
# Check factors
factor_loadings = fa.loadings_
eigen_values, vectors = fa.get_eigenvalues()
communalities = fa.get_communalities()
# Create scree plot 
# plt.scatter(range(1,29),eigen_values)
# plt.plot(range(1,29),eigen_values)
# plt.title('Scree Plot')
# plt.xlabel('Factors')
# plt.ylabel('Eigenvalue')
# plt.grid()
# plt.show()

def clump_factor_vars(factor_loadings,factor_num):
コード例 #2
0
print(df.columns)

df.drop(['gender', 'education', 'age'], axis=1, inplace=True)

df.dropna(inplace=True)
print(df.info())
print(df.head())
# Before you perform factor analysis, you need to evaluate the “factorability” of our dataset.

chi_square_value, p_value = calculate_bartlett_sphericity(df)
print(chi_square_value, p_value)

# Create factor analysis object and perform factor analysis

fa = FactorAnalyzer()
fa.set_params(n_factors=25, rotation=None)
fa.fit(df)
# Check Eigenvalues
ev, v = fa.get_eigenvalues()
print(ev)

# Here, you can see only for 6-factors eigenvalues are greater than one. It means we need to choose only 6 factors (or unobserved variables).

# Create scree plot using matplotlib
plt.scatter(range(1, df.shape[1] + 1), ev)
plt.plot(range(1, df.shape[1] + 1), ev)
plt.title('Scree Plot')
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.grid()
plt.show()