예제 #1
0
def make_loadings_matrix(rating_m):
    '''Takes a rating matrix and returns the loading matrix. Optimized for number of components
    using the knee, with a oblimin rotation for interpretability
    '''
    # Fit the initial factor analysis
    fa = FactorAnalyzer(n_factors=10, rotation='oblimin')
    fa.fit(rating_m)
    x = list(range(1, 16))
    fa_eigens = fa.get_eigenvalues()[1]
    fa_matrix_knee = KneeLocator(x,
                                 fa_eigens,
                                 S=1.0,
                                 curve='convex',
                                 direction='decreasing')
    fa_knee = fa_matrix_knee.knee
    fa_kneed = FactorAnalyzer(n_factors=fa_knee,
                              rotation='varimax').fit(rating_m)
    loadings_m = pd.DataFrame(fa_kneed.loadings_.round(2))
    loadings_m.index = get_construct_names()
    loadings_m.index = loadings_m.index.rename(name='Construct')
    loadings_m.columns = [
        'Factor {} ({:.0f}%)'.format(
            i + 1,
            fa_kneed.get_factor_variance()[1][i] * 100)
        for i in loadings_m.columns
    ]
    return loadings_m
예제 #2
0
def numbFactorsTest(X, m=1, met='ml', alfa=0.05):  #met='principal','minres'
    n, p = X.shape
    R = np.corrcoef(np.transpose(X))
    p_val = 0

    fa = FactorAnalyzer(method=met,
                        rotation='varimax',
                        n_factors=m,
                        is_corr_matrix=False)
    fa.fit(X)
    l = fa.loadings_
    ll = l @ l.T
    fi = np.diag(R) - np.diag(ll)
    Sg = ll + np.diag(fi)

    l = 1 / 2 * (2 * p + 1 - (8 * p + 1)**0.5)
    if m < l:
        df = (((p - m)**2) - (p + m)) * 1 / 2
        vt = (n - 1 - (2 * p + 4 * m + 5) / 6) * np.log(
            np.linalg.det(Sg) / np.linalg.det(R))
        vc = stats.chi2.ppf(1 - alfa, df)
        p_val = stats.chi2.pdf(vt, df, 1 - alfa)  #p-value
        if vt > vc:  #se rechaza H0
            H0 = False
        else:
            H0 = True
    else:
        H0 = False

    cumVar = fa.get_factor_variance()[2][-1]
    return (H0, p_val, cumVar)


#%%
예제 #3
0
def def_factor_analysis(X, k, rotation_=None):
    model = FactorAnalyzer(n_factors=k, rotation=rotation_).fit(X)

    eigen = model.get_eigenvalues()
    l = model.loadings_
    v = model.get_factor_variance()

    return eigen, l, v
예제 #4
0
 def _get_variance_info(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """
     Return a Tuple consisting of 3 arrays:
     1. Sum of squared loadings (variance)
     2. Proportional variance
     3. Cumulative variance
     """
     fa = FactorAnalyzer(rotation=None)
     fa.fit(self.df.dropna())
     return fa.get_factor_variance()
예제 #5
0
def loadThem(rotation, factors):
    fa = FactorAnalyzer(rotation=rotation, n_factors=factors)
    fa = fa.fit(df.values)
    loadings = fa.loadings_

    # Visualize factor loadings
    import numpy as np
    Z = np.abs(fa.loadings_)
    fig, ax = plt.subplots()
    c = ax.pcolor(Z)
    fig.colorbar(c, ax=ax)
    ax.set_yticks(np.arange(fa.loadings_.shape[0]) + 0.5, minor=False)
    ax.set_xticks(np.arange(fa.loadings_.shape[1]) + 0.5, minor=False)
    ax.set_title(rotation)
    plt.show()

    vari = fa.get_factor_variance()

    return loadings, vari
예제 #6
0
def FA(observied_variables, name):
    from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
    chi_square_value, p_value = calculate_bartlett_sphericity(
        observied_variables)
    print("chi_square_value", chi_square_value, "p-value:", p_value)
    from factor_analyzer.factor_analyzer import calculate_kmo
    kmo_all, kmo_model = calculate_kmo(observied_variables)
    print("KMO value", kmo_model)

    # Create factor analysis object and perform factor analysis
    if name == 'phone':
        fa = FactorAnalyzer(n_factors=2)
    if name == 'QOL':
        fa = FactorAnalyzer(n_factors=4)
    fa.fit_transform(observied_variables)
    # Check Eigenvalues
    eigen_values, vectors = fa.get_eigenvalues()
    print(eigen_values)
    """
    # Create scree plot using matplotlib
    plt.scatter(range(1,observied_variables.shape[1]+1),eigen_values)
    plt.plot(range(1,observied_variables.shape[1]+1),eigen_values)
    if name == 'phone':
        plt.title('Scree Plot for phone features',fontsize=24)
    if name == 'QOL':
        plt.title('Scree Plot for QOL features',fontsize=24)
    plt.xlabel('Factors', fontsize=18)
    plt.ylabel('Eigenvalue',fontsize=18)
    plt.grid()
    plt.show()
    """

    loadings = fa.loadings_
    print(pd.DataFrame(loadings, observied_variables.columns))
    #print(pd.DataFrame(fa.get_communalities()))
    return pd.DataFrame(loadings, observied_variables.columns)

    # Get variance of each factors
    print(
        pd.DataFrame(fa.get_factor_variance(),
                     ['SS Loadings', 'Proportion Var', 'Cumulative Var']))
예제 #7
0
    def factor_analysis(self, *x_columns: str, n_factor:int=None) -> dict:
        """因子分析

        :param x_column: x因子所在的列名
        :param n_factor: 公因子个数(可手动设置,默认为自动)
        :return: 字典,包括公因子方差、成分矩阵和解释的总方差
        """
        columns = []
        for x in x_columns:
            columns.append(x)
        X_data = pd.DataFrame(self.data, columns=columns)
        if n_factor is not None:
            fa = FactorAnalyzer(method="principal", n_factors=n_factor)
        else:
            fa = FactorAnalyzer(method="principal")
        fa.fit(X_data)
        result_dict = dict()
        result_dict['communalities'] = fa.get_communalities().tolist()
        result_dict['component_matrix'] = fa.loadings_.tolist()
        result_dict['factor_variance'] = [arr.tolist() for arr in fa.get_factor_variance()]
        return result_dict
예제 #8
0
    def fit(self, n_factors=3, rotation='varimax'):
        '''
        Parameters
        ----------
        n_factors : int, optional, (default:3)
        \t The number of factors to select
        
        rotation : str, optional, (default:'varimax')
        \t The type of rotation to perform after fitting 
        \t the factor analysis model
        
        \t Rotation Methods
        \t (a) varimax : orthogonal rotation
        \t (b) promax  : oblique rotation
        \t (c) oblimin : oblique rotation
        \t (d) oblimax : orthogonal rotation
        \t (e) quartimin : oblique rotation
        \t (f) quartimax : orthogonal rotation
        \t (g) equamax : orthogonal rotation
        
        Returns
        -------
        self.variance : array of floats
        \t Calculate the factor variance information, 
        \t including variance, proportional variance and 
        \t cumulative variance for each factor

        self.loadings_ : array of floats, 
        of shape(n_factors, n_factors)
        \t The factor loadings matrix
        '''
        self.n_factors, self.rotation = n_factors, rotation
        kwargs = dict(n_factors=n_factors,
                      rotation=rotation,
                      is_corr_matrix=True)
        fa = FactorAnalyzer(**kwargs)
        fa.fit(self.loadings)
        self.variance = fa.get_factor_variance()
        self.loadings_ = fa.loadings_
def factor_analysis(org, repo):
    # https://www.datacamp.com/community/tutorials/introduction-factor-analysis
    # https://www.theanalysisfactor.com/the-fundamental-difference-between-principal-component-analysis-and-factor-analysis/
    # https://factor-analyzer.readthedocs.io/en/latest/factor_analyzer.html#module-factor_analyzer.factor_analyzer
    # https://towardsdatascience.com/factor-analysis-101-31710b7cadff
    issues1 = c.get_issues_with_response_time(org, repo, False)
    issues2 = c.get_issues_with_processing_time(org, repo, False)
    issues = pd.merge(issues1,
                      issues2[["number", "processing_time", "closed_at"]],
                      how="left",
                      on="number")
    issues = issues[[
        "company", "processing_time", "response_time", "priority"
    ]]

    issues.dropna(
        subset=["processing_time", "response_time", "priority", "company"],
        inplace=True)
    issues.company.replace(
        {
            'Google': 5,
            'RedHat': 4,
            'Microsoft': 3,
            'VMware': 2,
            'Huawei': 2,
            'ZTE': 1
        },
        inplace=True)
    issues["priority"] = issues["priority"].astype(float)
    issues["company"] = issues["company"].astype(float)
    print(issues.info())

    fa = FactorAnalyzer(rotation='varimax', n_factors=3)
    print(fa.fit(issues))
    print(fa.loadings_)
    print(fa.get_factor_variance())
예제 #10
0
                      [49, 56, 54, 61, 51], [35, 38, 57, 65, 57]])
seiseki_in = pd.DataFrame(seiseki_a, columns=subject)
seiseki = pd.DataFrame(scale(seiseki_in), columns=seiseki_in.columns.values)

fa = FactorAnalyzer()
fa.analyze(seiseki, 2, rotation="varimax")
#fa.analyze(seiseki, 2, rotation="promax")
#fa.analyze(seiseki, 2, rotation=None)

print('相関行列\n', seiseki.corr(method='pearson'))
print()
print('因子負荷量', fa.loadings.round(4))  # loadings
print()
print('独自性', fa.get_uniqueness().round(4))  # uniqueness
print()
print('因子分散', fa.get_factor_variance().round(4))
print()

##################
#寄与率
kiyo = np.array([0, 0])
for i in range(len(fa.loadings)):
    u = np.array(fa.loadings.iloc[i])
    kiyo = kiyo + u * u
kiyo = pd.DataFrame(kiyo / len(fa.loadings),
                    index=fa.loadings.columns.values).T
kiyo = kiyo.append(pd.DataFrame(np.cumsum(kiyo, axis=1)),
                   ignore_index=True).rename({
                       0: '寄与率',
                       1: '累積寄与率'
                   })
예제 #11
0
loadings = pd.DataFrame(Ypca.loadings_,
                        index=cbs.test_names(),
                        columns=pca_names)

# Pairwise correlations between test scores
var_corrs = pd.DataFrame(Ypca.corr_,
                         index=cbs.test_names(),
                         columns=cbs.test_names())

# Eigenvalues of the components
eigen_values = pd.DataFrame(Ypca.get_eigenvalues()[0][0:3],
                            index=pca_names,
                            columns=['eigenvalues']).T

# Percentage variabnce explained by each component
pct_variance = pd.DataFrame(Ypca.get_factor_variance()[1] * 100,
                            index=pca_names,
                            columns=['% variance']).T

# Generates and displays the chord plot to visualize the factors
fig = chord_plot(loadings.copy(),
                 var_corrs.copy(),
                 cscale_name='Picnic',
                 width=700,
                 height=350,
                 threshold=0.20)

save_and_display_figure(fig, 'Figure_1A')

#%%
# Generate a table of task to composite score loadings
fa.loadings


# In[29]:


fa = FactorAnalyzer()
fa.analyze(df, 4, rotation="varimax")
fa.loadings


# In[30]:


fa.get_factor_variance()


# In[31]:


fa = FactorAnalyzer()
fa.analyze(df, 5, rotation="varimax")
fa.loadings


# In[32]:


fa.get_factor_variance()
ev, v = fa.get_eigenvalues()

# TODO @abhi18av make this better
# We can see only for 5-factors eigenvalues are greater or close to one. It means we need to choose only 5 factors (or unobserved variables)
ev
print_ln()

# v
# print_ln()

# plt.scatter(range(1,eff.shape[1]+1),ev)
# plt.plot(range(1,eff.shape[1]+1),ev)
# plt.title('Scree Plot')
# plt.xlabel('Factors')
# plt.ylabel('Eigenvalue')
# plt.grid()
# plt.show()

print_ln()

eff_factor_variance = fa.get_factor_variance()

eff = eff.dropna(thresh=3)

eff

eff.to_csv("eff_data_py.csv")

eff = eff.apply(lambda x: x.fillna(x.median()), axis=0)
eff
예제 #14
0
#print(components_SVD)
scores_spectral = pd.DataFrame(index=['TA_F', 'PA_F', 'LW_IN_F' , 'VPD_F', 'SW_IN_F', 'CO2_F_MDS', 
             'WS_F', 'LE_F_MDS', 'H_F_MDS', 'RH', 'USTAR'],
                          data = np.transpose(components_spectral),
                          columns=['PC{}'.format(i+1) for i in range(components_spectral.shape[1])])
print(scores_spectral.head(11))

fa = FactorAnalyzer(n_factors=12, rotation="varimax")
fa.fit(temp)
loadings_df = pd.DataFrame(index=['TA_F', 'PA_F', 'LW_IN_F' , 'VPD_F', 'SW_IN_F', 'CO2_F_MDS', 
             'WS_F', 'LE_F_MDS', 'H_F_MDS', 'RH', 'USTAR'],
                        data = fa.loadings_,
                        columns=['F{}'.format(i+1) for i in range(fa.loadings_.shape[1])])
print(loadings_df.head(11))
variances = np.array(fa.get_factor_variance()[:][0])
print(variances/sum(variances))
plt.figure(figsize=(12,7))
plt.plot(np.cumsum(variances/sum(variances)), linewidth=3.0)
plt.show()

#comp_variance, components = sclearn_PCA(temp.values)
#print(components)

#for i in range(len(comp_variance)):
#    print('Described variance: %1.6F' % (float(comp_variance[i]) / float(comp_variance.sum())))
#    print(comp_variance[i], '\n')
#print(components[0])
#print(components[:, 0])
#
#print(domain_specific_approach(0.1, comp_variance, components))
예제 #15
0
def factor_analysis(factor_df, max_feature_count=None, plot=True):
    """
    因子分析,提取N个特征,查看是否有效
    :param factor_df:
    :param max_feature_count:
    :param plot:
    :return:
    """
    ana_dic = {}
    max_feature_count = np.min(
        [factor_df.shape[1] //
         3, 50] if max_feature_count is None else max_feature_count)
    for n_features in range(2, max_feature_count):
        logger.info(f"{n_features} 个因子时:")
        fa = FactorAnalyzer(n_factors=n_features, rotation=None)
        exception = None
        for _ in range(8, 0, -1):
            df = factor_df if _ == 0 else factor_df.sample(
                factor_df.shape[0] // (_ + 1) * _)
            try:
                fa.fit(df)
                break
            except LinAlgError as exp:
                exception = exp
                logger.exception("当前矩阵 %s 存在可逆矩阵,尝试进行 %d/(%d+1) 重新采样",
                                 df.shape, _, _)
                logger.warning(exception is None)
        else:
            logger.warning(exception is None)
            raise exception from exception

        communalities = fa.get_communalities()
        logger.info(f"\t共因子方差比(communality)({communalities.shape})")  # 公因子方差
        # logger.debug('\n%s', communalities)
        loadings = fa.loadings_
        logger.info(f"\t成分矩阵,即:因子载荷(loading)({loadings.shape})")  # 成分矩阵
        # logger.debug('\n%s', loadings)  # 成分矩阵
        var = fa.get_factor_variance()  # 给出贡献率
        # 1. Sum of squared loadings (variance)
        # 2. Proportional variance
        # 3. Cumulative variance
        logger.info(f"\tCumulative variance {var[2]}")
        kmo_per_variable, kmo_total = calculate_kmo(fa.transform(factor_df))
        if kmo_total < 0.6:
            logger.info(f'\t× -> kmo_total={kmo_total:.5f} 变量间的相关性弱,不适合作因子分析')
        else:
            logger.info(
                f'\t√ -> kmo_total={kmo_total:.5f} 变量间的相关性强,变量越适合作因子分析')
        ana_dic[n_features] = {
            "FactorAnalyzer": fa,
            # "communalities": communalities,
            # "loadings": loadings,
            # "Sum of squared loadings": var[0],
            # "Proportional variance": var[1],
            "Cumulative variance": var[2][-1],
            "KOM_Test_total": kmo_total,
        }
        if var[2][-1] > 0.95 and kmo_total > 0.6:
            break

    ana_data = pd.DataFrame(
        {k: v
         for k, v in ana_dic.items() if k != 'FactorAnalyzer'}).T
    if plot:
        ana_data.plot(subplots=True, figsize=(9, 6))
        plt.show()

    return ana_dic
예제 #16
0
# data_new即是处理后的股票数据
# print(data_new)


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


# 建立模型
fa = FactorAnalyzer(rotation='varimax', n_factors=12)  # 固定公共因子个数为5
fa.fit(data_new)
print("公因子方差:\n", fa.get_communalities())  # 公因子方差
matrix_orth = fa.loadings_
print("\n成分矩阵\n", matrix_orth)
var = fa.get_factor_variance()  # 给出贡献率
print("\n解释的总方差(即贡献率):\n", var)
# 分别取两位小数
print("\n特征值:\n", list(map(lambda x: round(x, 4), var[0])))
print("\n因子贡献率:\n", list(map(lambda x: round(x, 4), var[1])))
print("\n累计贡献率:\n", list(map(lambda x: round(x, 4), var[2])))

# 设置数据框的最大行、最大列和不换行(针对数据框)
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)
pd.set_option('expand_frame_repr', False)
# 将数据类型转换为数据框
data22 = pd.DataFrame(data)
# 取出数据框的列名
columns_name = data22.columns
# 按因子分析找出相应的股票
예제 #17
0
def FactorAnalysis(df, rotation = "varimax", n_factors = 10, transform = False):

    """ You want "varimax" rotation if you want orthogonal (highly differentiable) with very high and low variable loading. common
        You want "oblimin" for non-orthogonal loading. Increases eigenvalues, but reduced interpretability.
        You want "promax" if you want Oblimin on large datasets.
        
        See https://stats.idre.ucla.edu/spss/output/factor-analysis/ for increased explination. 
    """   

    assert not df.isnull().values.any(), "Data must not contain any nan or inf values"
    assert all(df.std().values > 0), "Columns used in Factor Analysis must have a non-zero Std. Dev. (aka more than a single value)"  

    def data_suitable(df, kmo_value = False, ignore = False):
        
        #Test to ensure data is not identity Matrix
        chi_square_value, p_value = calculate_bartlett_sphericity(df)
        
        # test to ensure that observed data is adquite for FA. Must be > 0.6
        kmo_all, kmo_model = calculate_kmo(df)

        if (p_value > 0.1 or kmo_model < 0.6) and ignore != True:
            raise Exception("Data is not suitable for Factor Analysis!: Identity test P value: {}.  KMO model Score: {}".format(p_value, kmo_model))
        
        if kmo_value:
            return kmo_model
        else:
            return
        
        
    print("KMO Value: {}.".format(data_suitable(df, kmo_value = True)))

    fa = FactorAnalyzer(method = "minres", 
                        rotation = rotation,
                        n_factors = n_factors)

    fa.fit(df)

    def eigenplot(df):
        df = pd.DataFrame(df)
        
        fig = go.Figure()
        
        fig.add_trace(
            go.Scatter(
                x = df.index.values,
                y = df[0].values,
                mode = 'lines'
            )
        )
        
        
        fig.add_shape(
            type = "line",
            y0 = 1,
            x0 = 0,
            y1 = 1,
            x1 = len(df),
            line = dict(
                color = 'red',
                dash = 'dash'
            )
        )
        
        fig.update_layout(
            title = "Factor Eigenvalues",
            yaxis_title="Eigenvalue",
            xaxis_title="Factor",
            xaxis = dict(
                range = [0,df[df[0] > 0].index.values[-1]]
                )
        )
        
        fig.show()
        return

    eigenplot(fa.get_eigenvalues()[1])
    Plotting.LabeledHeatmap(fa.loadings_, y = list(df.columns), title = "Factor Loading", expand = True, height = 2000, width = 2000)

    tmp = pd.DataFrame(fa.get_factor_variance()[1:]) 
    tmp.index = ["Proportional Varience","Cumulative Varience"]
    Plotting.dfTable(tmp)

    if rotation == 'promax':
        Plotting.LabeledHeatmap(fa.phi_, title = "Factor Correlation", expand = True, height = 2000, width = 2000)
        Plotting.LabeledHeatmap(fa.structure_, y = list(df.columns), title = "Variable-Factor Correlation", expand = True, height = 2000, width = 2000)

    Plotting.LabeledHeatmap(pd.DataFrame(fa.get_communalities()).T, 
                            title = "Varience Explained",
                            x = list(df.columns), 
                            description = "The proportion of each variables varience that can be explained by the factors.", 
                            expand = True, 
                            height = 300, 
                            width = 2000)

    Plotting.LabeledHeatmap(pd.DataFrame(fa.get_uniquenesses()).T, 
                            title = "Variable Uniqueness",
                            x = list(df.columns),
                            expand = True, 
                            height = 300,
                             width = 2000)

    if transform:
        return fa.transform(df)

    return 
예제 #18
0
plt.scatter(range(1, data.shape[1] + 1), autovalores)
plt.plot(range(1, data.shape[1] + 1), autovalores)
plt.title('Scree Plot')
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.grid()
plt.show()

# Criamos um objeto analise de faotres com rotacao varimax
analisador_varimax = FactorAnalyzer(n_factors=5, rotation="varimax")
analisador_varimax.fit(data)

autovalores_varimax, v = analisador_varimax.get_eigenvalues()
print(autovalores_varimax)
# Nesta linha conseguimos ver que a variancia cumulativa chega a 42% com 5 fatores
print(analisador_varimax.get_factor_variance())

# Criamos um objeto analise de faotres com rotacao quartimax
analisador_quartimax = FactorAnalyzer(n_factors=5, rotation="quartimax")
analisador_quartimax.fit(data)

autovalores_quartimax, v = analisador_quartimax.get_eigenvalues()
print(autovalores_quartimax)
# Nesta linha conseguimos ver que a variancia cumulativa chega a 42% com 5 fatores
print(analisador_quartimax.get_factor_variance())

# Criamos um objeto analise de faotres com rotacao promax
analisador_promax = FactorAnalyzer(n_factors=5, rotation="promax")
analisador_promax.fit(data)

autovalores_promax, v = analisador_promax.get_eigenvalues()
예제 #19
0
# Check Eigenvalues
ev, v = fa.get_eigenvalues()
plt.scatter(range(1, datas.shape[1]+1), ev)
plt.plot(range(1, datas.shape[1]+1), ev)
plt.title('Scree Plot')
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.grid()
plt.show()

fa = FactorAnalyzer(n_factors=2, method='principal', rotation='varimax')
fa.fit(datas)
# 公因子方差
print(fa.get_communalities())
# 特征值
print("\n特征值:\n", fa.get_factor_variance()[0])
# 方差贡献率
print("\n方差贡献率:\n", fa.get_factor_variance()[1])
# 累积方差贡献率
print("\n累计方差贡献率:\n", fa.get_factor_variance()[2])

print("\n成分矩阵:\n", fa.loadings_)
rotator = Rotator()
load_matrix = rotator.fit_transform(fa.loadings_)
print(load_matrix)

# 因子得分系数矩阵
# 相关系数
corr = datas.corr()
# array转matrix
corr = np.mat(corr)
예제 #20
0
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.grid()
plt.show()

figure = g.get_figure()
figure.savefig('Scree_plot.pdf', dpi=400)

# Performing Factor Analysis
# Create factor analysis object and perform factor analysis
fa = FactorAnalyzer(rotation='varimax', n_factors=30)
fa.fit(X)
a = fa.loadings_

# Get variance of each factor
factorVar = fa.get_factor_variance()
factorVar = np.asarray(factorVar)
factorVar.sum(axis=1)
# --> Total of 60 % Variance is explained by the 30 factors

# Make Faktor plot with named legend, does not work yet
#FA = FactorAnalysis(n_components = 30).fit_transform(X.values)
#a = pd.DataFrame(FA)
#newNames = list(VarbList[0:30])
#oldNames = list(a.columns[0:30])
#
#rename = {i:j for i,j in zip(oldNames,newNames)}
#a.rename(columns = rename, inplace = True)
#
#plt.figure(figsize=(12,8))
#plt.title('Factor Analysis Components')
예제 #21
0
time.sleep(3)


#Performing factor analysis
print('-'*100)
print('Eigen values')
print('-'*100)
fa = FactorAnalyzer()
fa.analyze(data, rotation="varimax")
# Check Eigenvalues
ev, v = fa.get_eigenvalues()
print(ev)
print('-'*100)
print(fa.loadings)
print('-'*100)
print(fa.get_factor_variance())

time.sleep(6)

g=[data.columns]
g=np.array(g).tolist()
g=g[0]
l=[]
for i in g:
	i=i.replace(' ', '\n')
	l.append(i)
data.columns=l

#create a scree plot using matplotlib
data.columns=l
plt.figure(figsize=(8,6))