예제 #1
0
def calculate_py_output(test_name,
                        factors,
                        method,
                        rotation,
                        top_dir=None):
    """
    Use the `FactorAnalyzer()` class to perform the factor analysis
    and return a dictionary with relevant output for given scenario.

    Parameters
    ----------
    test_name : str
        The name of the test
    factors : int
        The number of factors
    method : str
        The rotation method
    rotation : str
        The type of rotation
    top_dir : str, optional
        The top directory for test data
        Defaults to `DATA_DIR``

    Returns
    -------
    output : dict
        A dictionary containing the outputs
        for all `OUTPUT_TYPES`.
    """
    if top_dir is None:
        top_dir = DATA_DIR

    filename = join(top_dir, test_name + '.csv')
    data = pd.read_csv(filename)

    rotation = None if rotation == 'none' else rotation
    method = {'uls': 'minres'}.get(method, method)

    fa = FactorAnalyzer()
    fa.analyze(data, factors, method=method, rotation=rotation)

    evalues, values = fa.get_eigenvalues()

    return {'value': values,
            'evalues': evalues,
            'structure': fa.structure,
            'loading': fa.loadings,
            'uniquenesses': fa.get_uniqueness(),
            'communalities': fa.get_communalities(),
            'scores': fa.get_scores(data)}
예제 #2
0
    def run(self, dfx, n_factors=3):

        self.n_factors = n_factors

        msg = {}

        x_numer_cols, x_cate_cols = ParseDFtypes(dfx)

        if x_numer_cols == []:
            logging.error(
                'All input dfx are no numeric columns, Please check your input dfx data!'
            )
            msg['error'] = 'All input dfx are no numeric columns, Please check your input dfx data!'
            return {'result': pd.DataFrame(), 'msg': msg}

        else:

            if x_cate_cols != []:
                logging.warning(
                    'input dfx has non-numeric columns: %s, will ignore these columns!'
                    % x_cate_cols)

                msg['warning'] = 'input dfx has non-numeric columns: %s, will ignore these columns!' % x_cate_cols

        dfu = dfx[x_numer_cols]

        fa = FactorAnalyzer()
        fa.analyze(dfu, n_factors, rotation=None)
        l = fa.loadings
        c = fa.get_communalities()
        s = fa.get_scores(dfu)

        l.columns = ['因子%s荷载系数' % (i + 1) for i in range(n_factors)]
        c.columns = ['共同度']
        s.columns = ['因子%s' % (i + 1) for i in range(n_factors)]

        res = l.join(c)

        return {'result': res, 'msg': msg, 'factor': s}
예제 #3
0
#Factor loadings
fa.loadings

# In[22]:

#communalities
fa.get_communalities()

# In[23]:

#get_factor_variance
fa.get_factor_variance()

# In[26]:

Fac_score = fa.get_scores(df)

# In[27]:

print(Fac_score.head())

# # Cluster Analysis using factor scores

# In[28]:

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

# In[82]:

n_clusters = [5, 10, 15, 16, 17, 18, 19, 20, 25, 30]