예제 #1
0
def calculate_py_output(test_name,
                        factors,
                        method,
                        rotation,
                        top_dir=None):
    """
    Use the `FactorAnalyzer()` class to perform the factor analysis
    and return a dictionary with relevant output for given scenario.

    Parameters
    ----------
    test_name : str
        The name of the test
    factors : int
        The number of factors
    method : str
        The rotation method
    rotation : str
        The type of rotation
    top_dir : str, optional
        The top directory for test data
        Defaults to `DATA_DIR``

    Returns
    -------
    output : dict
        A dictionary containing the outputs
        for all `OUTPUT_TYPES`.
    """
    if top_dir is None:
        top_dir = DATA_DIR

    filename = join(top_dir, test_name + '.csv')
    data = pd.read_csv(filename)

    rotation = None if rotation == 'none' else rotation
    method = {'uls': 'minres'}.get(method, method)

    fa = FactorAnalyzer()
    fa.analyze(data, factors, method=method, rotation=rotation)

    evalues, values = fa.get_eigenvalues()

    return {'value': values,
            'evalues': evalues,
            'structure': fa.structure,
            'loading': fa.loadings,
            'uniquenesses': fa.get_uniqueness(),
            'communalities': fa.get_communalities(),
            'scores': fa.get_scores(data)}
# In[20]:

from factor_analyzer import FactorAnalyzer
fa = FactorAnalyzer()

# In[28]:

fa.analyze(train, 3, rotation=None)

# In[29]:

fa.loadings

# In[30]:

fa.get_uniqueness()
# we have to select which feature have the heighest uniqueness value that feature is the first importent variable

# ## Principle component Analysis

# In[31]:

from sklearn.decomposition import PCA
pca = PCA(n_components=4)
principalComponents = pca.fit_transform(df)
principalDf = pd.DataFrame(data=principalComponents,
                           columns=[
                               'principal component 1',
                               'principal component 2',
                               'principal component 3', 'principal component 4'
                           ])
예제 #3
0
                      [57, 46, 54, 46, 42], [38, 42, 41, 36, 41],
                      [43, 47, 41, 53, 44], [45, 51, 53, 46, 53],
                      [49, 56, 54, 61, 51], [35, 38, 57, 65, 57]])
seiseki_in = pd.DataFrame(seiseki_a, columns=subject)
seiseki = pd.DataFrame(scale(seiseki_in), columns=seiseki_in.columns.values)

fa = FactorAnalyzer()
fa.analyze(seiseki, 2, rotation="varimax")
#fa.analyze(seiseki, 2, rotation="promax")
#fa.analyze(seiseki, 2, rotation=None)

print('相関行列\n', seiseki.corr(method='pearson'))
print()
print('因子負荷量', fa.loadings.round(4))  # loadings
print()
print('独自性', fa.get_uniqueness().round(4))  # uniqueness
print()
print('因子分散', fa.get_factor_variance().round(4))
print()

##################
#寄与率
kiyo = np.array([0, 0])
for i in range(len(fa.loadings)):
    u = np.array(fa.loadings.iloc[i])
    kiyo = kiyo + u * u
kiyo = pd.DataFrame(kiyo / len(fa.loadings),
                    index=fa.loadings.columns.values).T
kiyo = kiyo.append(pd.DataFrame(np.cumsum(kiyo, axis=1)),
                   ignore_index=True).rename({
                       0: '寄与率',