예제 #1
0
    def get_ev(self, X):
        num_features = len(X.columns)
        fa = FactorAnalyzer(num_features, rotation=None, method=self.method)
        fa.fit(X)

        ev, v = fa.get_eigenvalues()
        return ev
예제 #2
0
def find_number_of_Factors_1(eigenval_limit, dimensions, obs,  kind, prnt):

    """this function calculates the number of factors with an Eigenvalue which is greater then the 'eigenval_limit,
    without the param trial_index
        :param   eigenval_limit: number (float) , recommended = 1.0
                 dimensions:     dimensions before dimensionality reduction (obs.shape[1])
                 obs:            2 dim array holding the averaged data
                 kind:           0, if data is averaged, 1 if data is single trial, 2 if data is concatenated
        :return: the number of factors generating the the data with eigenvalues greater then eigenval limit
                  """

    fa = FactorAnalyzer(bounds=(0.005, 1), impute='median', is_corr_matrix=False,
                        method='minres', n_factors=dimensions, rotation=None, rotation_kwargs={},
                        use_smc=True)

    fa.fit(obs)
    eigenvals, x = fa.get_eigenvalues()

    # take the eigenvals >= 1 --> number of them = number of relevant factors
    num_FA_dim = len(eigenvals[eigenvals >= eigenval_limit])

    if prnt:

        if kind == 0:
            print('averaged:')
            print('Number of Factors:                           ', num_FA_dim)


        elif kind == 2:
            print('concatenated:')
            print('Number of Factors:                           ', num_FA_dim)

    return num_FA_dim
예제 #3
0
 def scatter_2d(self) -> go.Figure:
     """ 2D scatter plot for clustered data """
     fa = FactorAnalyzer(rotation='varimax', n_factors=2, method='ml')
     components = fa.fit_transform(self.df)
     total_var = self.pro_var.sum() * 100
     return self._plot_scatter_2d(components, self.clustered_labels.cluster,
                                  total_var)
예제 #4
0
def get_fa_loads(d_phens,
                 kmo_threshold=0.6,
                 bartlett_threshold=0.05,
                 n_shuffle=100,
                 test_factorability=False):
    """
    Get factors
    :param d_phens:
    :param loading_thresh:
    :param kmo_threshold:
    :param bartlett_threshold:
    :param n_shuffle:
    :param test_factorability:
    :return:
    """

    # Evaluation of the “factorability” of phenotypes
    if test_factorability:
        _, bartlett_value = calculate_bartlett_sphericity(d_phens)
        _, kmo_model = calculate_kmo(d_phens)
        if (kmo_model < kmo_threshold) or (bartlett_value > bartlett_threshold):
            # raise ValueError('Phenotypic data does not contain factors')
            warnings.warn('\nPhenotypic data does not contain factors')
            return None

    # Define the number of afctors by parallel analysis
    n_factors = pa(d_phens, n_shuffle)

    # factor analysis
    fa = FactorAnalyzer(n_factors=n_factors)
    fa.fit(d_phens)

    loads = pd.DataFrame(data=fa.loadings_, index=d_phens.columns)

    return loads
예제 #5
0
    def get_loadings(self,
                     by: CUTOFF_METHOD = "scree",
                     threshold: float = 1,
                     n_factors: Optional[int] = None,
                     is_filter: bool = False) -> pd.DataFrame:
        """
        Get PCA loading dataframe

        :param by:
            Cutoff method using Cummulative Variance Plot or Scree Plot
        :param threshold:
            Percentage of variance explained, default 80%
        :param n_factors:
            Number of factors.
        :param is_filter:
            If False will show all PCA loadings heatmap. If True, will only show attributes with cells > 0.55.
        """
        df = self.df
        _factors = self._get_factors(
            by, threshold) if n_factors is None else n_factors
        fa = FactorAnalyzer(rotation='varimax',
                            n_factors=_factors,
                            method='ml')
        fa.fit(df)
        fa_loading_matrix = pd.DataFrame(
            fa.loadings_, columns=[f'FA{i}' for i in range(1, _factors + 1)])
        return self._process_loading_matrix(fa_loading_matrix, is_filter)
예제 #6
0
def eigenvalues_plt(data):
    img = io.BytesIO()

    plt.switch_backend('Agg')

    plt.style.use('ggplot')
    fa = FactorAnalyzer()
    fa.fit(data)
    eigen_values, vectors = fa.get_eigenvalues()
    plt.figure(figsize=(10, 10))
    plt.scatter(range(1, data.shape[1] + 1), eigen_values)
    plt.plot(range(1, data.shape[1] + 1), eigen_values)
    plt.title('Factor Importance by Eigenvalues')

    plt.xlabel('Factors')
    plt.ylabel('Eigenvalue')
    plt.grid()

    plt.savefig(img, format='png')

    img.seek(0)
    graph_url = base64.b64encode(img.getvalue()).decode()
    plt.close()

    return 'data:image/png;base64,{}'.format(graph_url)
예제 #7
0
def plotfig(cols):
    c = df1.corr()
    xa = df1[df1.columns[2:7]]
    fa = FactorAnalyzer()
    fa.fit(xa, 10)  #Get Eigen values and plot them
    ev, v = fa.get_eigenvalues()
    ev
    #plt.plot(range(1,xa.shape[1]+1),ev)
    fig = px.scatter(x=range(1, xa.shape[1] + 1), y=ev)
    fig.update_traces(mode='lines+markers')

    fig.update_layout(yaxis={'visible': True, 'showticklabels': True})
    fig.update_layout(xaxis={'visible': True, 'showticklabels': True})
    fig.update_layout(width=700, height=200, plot_bgcolor='rgb(255,255,255)')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#dddddd')
    fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
    fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

    fig['layout'].update(margin=dict(l=0, r=20, b=20, t=10))
    fig.update_traces(line=dict(color="#0863ae"))
    fig.update_layout(xaxis_title="X",
                      yaxis_title="Y",
                      legend_title="Factor Analysis",
                      font=dict(family="Courier New, monospace",
                                size=12,
                                color="black"))
    return fig
예제 #8
0
def numbFactorsTest(X, m=1, met='ml', alfa=0.05):  #met='principal','minres'
    n, p = X.shape
    R = np.corrcoef(np.transpose(X))
    p_val = 0

    fa = FactorAnalyzer(method=met,
                        rotation='varimax',
                        n_factors=m,
                        is_corr_matrix=False)
    fa.fit(X)
    l = fa.loadings_
    ll = l @ l.T
    fi = np.diag(R) - np.diag(ll)
    Sg = ll + np.diag(fi)

    l = 1 / 2 * (2 * p + 1 - (8 * p + 1)**0.5)
    if m < l:
        df = (((p - m)**2) - (p + m)) * 1 / 2
        vt = (n - 1 - (2 * p + 4 * m + 5) / 6) * np.log(
            np.linalg.det(Sg) / np.linalg.det(R))
        vc = stats.chi2.ppf(1 - alfa, df)
        p_val = stats.chi2.pdf(vt, df, 1 - alfa)  #p-value
        if vt > vc:  #se rechaza H0
            H0 = False
        else:
            H0 = True
    else:
        H0 = False

    cumVar = fa.get_factor_variance()[2][-1]
    return (H0, p_val, cumVar)


#%%
예제 #9
0
def factors_lst(number_factors, lst_obs, prnt):
    """
    Does the Factor anlaysing/dimensionality reduction for a list of observations with a loop ober that list
    :param number_factors:  the number of factors to be taken (the reduced dimensionality), has to be the same for
                            all the list elements (integer)
    :param lst_obs:         list,  elements hold the dF/F for the Mouse/sessions/trial type of one specific trial
    :return:                list, elements hold the transformed observations. the shape of the elements is now
                            (time steps, number of factors)
    """

    lst_obs_transformed = []

    for item in lst_obs:

        fa = FactorAnalyzer(bounds=(0.005, 1), impute='median', is_corr_matrix=False,
                            method='minres', n_factors=number_factors, rotation=None, rotation_kwargs={},
                            use_smc=True)

        fa.fit(item)
        obs_transformed = fa.transform(item)

        lst_obs_transformed.append(obs_transformed)

    if prnt == True:

        print()
        print('shape of one element of the list')
        print('after the dim reduction:                  ', np.shape(lst_obs_transformed[0]))
        print('number of time steps:                     ', lst_obs_transformed[0].shape[0])
        print('number of dimensions:                     ', lst_obs_transformed[0].shape[1])

    return lst_obs_transformed
예제 #10
0
def def_factor_analysis(X, k, rotation_=None):
    model = FactorAnalyzer(n_factors=k, rotation=rotation_).fit(X)

    eigen = model.get_eigenvalues()
    l = model.loadings_
    v = model.get_factor_variance()

    return eigen, l, v
예제 #11
0
    def fit(self, X, y=None):
        ev = self.get_ev(X)
        self.weighted_ev = ev[:self.num_factors] / sum(ev[:self.num_factors])

        self.fa = FactorAnalyzer(self.num_factors, self.rotation, self.method)
        self.fa.fit(X)

        return self
예제 #12
0
 def FA(self):
     fa = FactorAnalyzer(n_factors=1, method="principal", rotation="varimax")
     fa.fit(self.df)
     # Print eigenvalues
     ev, v = fa.get_eigenvalues()
     print(ev)
     # Print loadings
     print(fa.loadings_)
     self.coeff = fa.loadings_
     return 0
예제 #13
0
 def _get_variance_info(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """
     Return a Tuple consisting of 3 arrays:
     1. Sum of squared loadings (variance)
     2. Proportional variance
     3. Cumulative variance
     """
     fa = FactorAnalyzer(rotation=None)
     fa.fit(self.df.dropna())
     return fa.get_factor_variance()
예제 #14
0
파일: lb_v2.py 프로젝트: ikem55/HRsystem
    def factory_analyze_raceuma_result_df(self, race_df, input_raceuma_df,
                                          dict_folder):
        """ RaceUmaの因子分析を行うためのデータを取得 """
        print("factory_analyze_raceuma_result_df")
        temp_df = pd.merge(input_raceuma_df, race_df, on="競走コード")
        X = temp_df[[
            '競走コード', '馬番', '枠番', 'タイム指数', '単勝オッズ', '先行率', 'ペース偏差値', '距離増減',
            '斤量比', '追込率', '平均タイム', "距離", "頭数", "非根幹", "上り係数", "逃げ勝ち", "内勝ち",
            "外勝ち", "短縮勝ち", "延長勝ち", "人気勝ち", "1番人気", "3角先頭", "4角先頭", "上がり最速",
            "上がりタイム", "連闘", "休み明け", "大差負け", "展開脚質", "展開脚色"
        ]]

        mmsc_columns = ["頭数", "展開脚質", "展開脚色", "上がりタイム"]
        mmsc_dict_name = "sc_fa_race_mmsc"
        stdsc_columns = ["距離"]
        stdsc_dict_name = "sc_fa_race_stdsc"
        X = mu.scale_df_for_fa(X, mmsc_columns, mmsc_dict_name, stdsc_columns,
                               stdsc_dict_name, dict_folder)

        X_fact = X.drop(["競走コード", "馬番"], axis=1).astype({
            '非根幹': int,
            '逃げ勝ち': int,
            '内勝ち': int,
            '外勝ち': int,
            '短縮勝ち': int,
            '延長勝ち': int,
            '人気勝ち': int,
            '1番人気': int,
            '3角先頭': int,
            '4角先頭': int,
            '上がり最速': int,
            '休み明け': int,
            '連闘': int,
            '大差負け': int
        })

        X_fact = X_fact.replace(np.inf,
                                np.nan).fillna(X_fact.median()).fillna(0)
        X_fact.iloc[0] = X_fact.iloc[0] + 0.000001

        dict_name = "fa_raceuma_result_df"
        filename = dict_folder + dict_name + '.pkl'
        if os.path.exists(filename):
            fa = mu.load_dict(dict_name, dict_folder)
        else:
            fa = FactorAnalyzer(n_factors=5, rotation='promax', impute='drop')
            fa.fit(X_fact)
            mu.save_dict(fa, dict_name, dict_folder)

        fa_np = fa.transform(X_fact)
        fa_df = pd.DataFrame(fa_np,
                             columns=["fa_1", "fa_2", "fa_3", "fa_4", "fa_5"])
        fa_df = pd.concat([X[["競走コード", "馬番"]], fa_df], axis=1)
        X_fact = pd.merge(input_raceuma_df, fa_df, on=["競走コード", "馬番"])
        return X_fact
예제 #15
0
    def factor_analysis_perdomain(self, write=False):
        '''
        Function to perform factor analysis per domain
        Args: 
            write: Boolean variable to choose whether to write the output to a file or not. 
        Returns:
            None
        '''
        # load domains into domain data
        domain_data = self.load_domains()

        # Get mapping of original numbers to shuffled numbers
        true_num = list(pd.read_csv(self.data, index_col=0))
        for d in self.domains:
            curr = domain_data[d]
            col = list(curr)
            ind = []
            for c in col:
                ind.append(true_num.index(c))
            index_num = list(map(int, ind))
            column_name = list(np.array(self.extract_features())[index_num])

            inp = domain_data[d]
            fa = FactorAnalyzer(n_factors=self.n, rotation='varimax')

            # In some cases, factor analysis does not success
            try:
                fa.fit(inp)
            except:
                print(
                    'Data from ' + str(d) +
                    ' domain cannot be factorized as it results in a singular matrix.'
                )
                continue
            magnitude = fa.get_communalities()

            mag_dict = {}
            for i, _ in enumerate(column_name):
                mag_dict[column_name[i]] = magnitude[i]

            sorted_mag = sorted(mag_dict.items(),
                                key=lambda kv: kv[1],
                                reverse=True)

            if write == True:
                factors = pd.DataFrame(sorted_mag,
                                       columns=['Feature', 'Importance'])
                if self.DC:
                    factors.to_csv('output/fa_decorrelated_' + str(d) + '_' +
                                   str(self.VER) + '.csv',
                                   index=False)
                else:
                    factors.to_csv('output/fa_' + str(d) + '_' +
                                   str(self.VER) + '.csv',
                                   index=False)
예제 #16
0
 def fit(self):
     """
     按所选方法进行变换
     :return: 变换完毕的所有向量
     """
     feed_data = self.data[self.selected_column]
     self.model = FactorAnalyzer(n_factors=feed_data.shape[1],
                                 method=self.method,
                                 rotation=None)
     self.model.fit(feed_data)
     return self.model.transform(feed_data)
예제 #17
0
def get_s(e_square):

    fa = FactorAnalyzer(n_factors=1, rotation="varimax")
    fa.fit(e_square)
    loadings = fa.loadings_

    tmp = (loadings - min(loadings)) / (max(loadings) - min(loadings))
    s = tmp / sum(tmp**2)
    s_prime = s / np.linalg.norm(s)

    return s_prime
def factor_analyzer(n_factors):
    fa = FactorAnalyzer(n_factors=n_factors, rotation=None)
    fa_fit_out = fa.fit(cars_ar)
    fa_communalities = fa_fit_out.get_communalities()
    fa_gof = sum(fa_communalities)
    fa_scores = fa_fit_out.transform(cars_ar)
    fa_factor_loadings = fa_fit_out.loadings_
    return {
        'fa_gof': fa_gof,
        'fa_communalities': fa_communalities,
        'fa_scores': fa_scores,
        'fa_factor_loadings': fa_factor_loadings
    }
예제 #19
0
def best_num_factors(df):
    fa = FactorAnalyzer(bounds=(0.005, 1),
                        impute='median',
                        is_corr_matrix=False,
                        method='minres',
                        n_factors=10,
                        rotation=None,
                        rotation_kwargs={},
                        use_smc=True)
    fa.fit(df)
    ev, v = fa.get_eigenvalues()
    num_f = len([e for e in ev if e > ev.mean() + 2 * ev.std()])
    return num_f
예제 #20
0
def show_num_factors(df):
    fa = FactorAnalyzer(bounds=(0.005, 1),
                        impute='median',
                        is_corr_matrix=False,
                        method='minres',
                        n_factors=10,
                        rotation='varimax',
                        rotation_kwargs={},
                        use_smc=True)
    fa.fit(df)
    ev, v = fa.get_eigenvalues()
    num_f = len([e for e in ev if e > ev.mean() + 2 * ev.std()])
    res_f = len([e for e in ev if e > 1])
    return f"Best number of factors: {num_f}. Other possible factors {res_f-num_f}"
예제 #21
0
def make_loadings_matrix(rating_m):
    '''Takes a rating matrix and returns the loading matrix. Optimized for number of components
    using the knee, with a oblimin rotation for interpretability
    '''
    # Fit the initial factor analysis
    fa = FactorAnalyzer(n_factors=10, rotation='oblimin')
    fa.fit(rating_m)
    x = list(range(1, 16))
    fa_eigens = fa.get_eigenvalues()[1]
    fa_matrix_knee = KneeLocator(x,
                                 fa_eigens,
                                 S=1.0,
                                 curve='convex',
                                 direction='decreasing')
    fa_knee = fa_matrix_knee.knee
    fa_kneed = FactorAnalyzer(n_factors=fa_knee,
                              rotation='varimax').fit(rating_m)
    loadings_m = pd.DataFrame(fa_kneed.loadings_.round(2))
    loadings_m.index = get_construct_names()
    loadings_m.index = loadings_m.index.rename(name='Construct')
    loadings_m.columns = [
        'Factor {} ({:.0f}%)'.format(
            i + 1,
            fa_kneed.get_factor_variance()[1][i] * 100)
        for i in loadings_m.columns
    ]
    return loadings_m
예제 #22
0
    def _get_factor_df(self, n_factors: int) -> pd.DataFrame:
        """
        Dimension reduced pandas Dataframe from original dataframe

        :param n_factors:
            Number of factors
        """
        fa = FactorAnalyzer(rotation='varimax',
                            n_factors=n_factors,
                            method='ml')
        factorDf = fa.fit(self.df)
        return pd.DataFrame(
            data=factorDf,
            columns=[f'factor {i}' for i in range(1, n_factors + 1)])
예제 #23
0
 def FactorAnalyze(self, rotate="varimax"):
     self.SharedVariance = copy.deepcopy(self.NormSubThresh)
     self.SharedVariance = self.SharedVariance.iloc[:1]
     self.SharedVariance.index.rename("Shared Variance", inplace=True)
     self.FactorLoadings = copy.deepcopy(self.NormSubThresh)
     self.FactorLoadings = self.FactorLoadings.iloc[:3]
     self.FactorLoadings.index.rename("Factor Number", inplace=True)
     for key in self.SharedVariance.columns.unique(level=0):
         factor = FactorAnalyzer(n_factors=3, rotation=rotate)
         factor.fit(sklearn.preprocessing.StandardScaler().fit_transform(
             self.NormSubThresh[key].values))
         self.SharedVariance[key] = np.atleast_2d(
             factor.get_communalities())
         self.FactorLoadings[key] = factor.loadings_.T
예제 #24
0
def fac_an(df, n_factors, name):
    drop_nn(df)
    fa = FactorAnalyzer(bounds=(0.005, 1),
                        impute='median',
                        is_corr_matrix=False,
                        method='minres',
                        n_factors=n_factors,
                        rotation='varimax',
                        rotation_kwargs={},
                        use_smc=True)
    fa.fit(df)
    load = pd.DataFrame.from_records(
        fa.loadings_,
        columns=([f'{name}' + str(i + 1) for i in range(n_factors)]))
    load['item#'] = df.columns
    return load
예제 #25
0
 def factor_analysis(self):
     fa = FactorAnalyzer(n_factors=self.N_factor,
                         rotation=self.rotation,
                         method=self.method)
     score = fa.fit_transform(self.dataset)
     header = ["Factor_%s" % i for i in range(1, self.N_factor + 1)]
     ### 因子負荷量
     self.loadings = fa.loadings_
     outf = "%s/factor_loadings.tsv" % self.outd
     df = pd.DataFrame(fa.loadings_, columns=header)
     df.to_csv(outf, sep="\t", index=False)
     self.logger.info("Facotr loadings are saved as %s." % outf)
     ### 因子得点
     outf = "%s/factor_score.tsv" % self.outd
     df = pd.DataFrame(score, columns=header)
     df.to_csv(outf, sep="\t", index=False)
     self.logger.info("Facotr scores are saved as %s." % outf)
     return 0
예제 #26
0
def get_MFA_params(zl, kl, rl_nextl):
    ''' Determine clusters with a GMM and then adjust a Factor Model over each cluster
    zl (ndarray): The lth layer latent variable 
    kl (int): The number of components of the lth layer
    rl_nextl (1darray): The dimension of the lth layer and (l+1)th layer
    -----------------------------------------------------
    returns (dict): Dict with the parameters of the MFA approximated by GMM + FA. 
    '''
    #======================================================
    # Fit a GMM in the continuous space
    #======================================================
    numobs = zl.shape[0]

    not_all_groups = True
    max_trials = 100
    empty_count_counter = 0

    while not_all_groups:
        # If not enough obs per group then the MFA diverge...

        gmm = GaussianMixture(n_components=kl)
        s = gmm.fit_predict(zl)

        clusters_found, count = np.unique(s, return_counts=True)

        if (len(clusters_found) == kl):  # & (count >= 5).all():
            not_all_groups = False

        empty_count_counter += 1
        if empty_count_counter >= max_trials:
            raise RuntimeError(
                'Could not find a GMM init that presents the \
                               proper number of groups:', kl)

    psi = np.full((kl, rl_nextl[0], rl_nextl[0]), 0).astype(float)
    psi_inv = np.full((kl, rl_nextl[0], rl_nextl[0]), 0).astype(float)
    H = np.full((kl, rl_nextl[0], rl_nextl[1]), 0).astype(float)
    eta = np.full((kl, rl_nextl[0]), 0).astype(float)
    z_nextl = np.full((numobs, rl_nextl[1]), np.nan).astype(float)

    #========================================================
    # And then a MFA on each of those group
    #========================================================

    for j in range(kl):
        indices = (s == j)
        fa = FactorAnalyzer(rotation=None, method='ml', n_factors=rl_nextl[1])
        fa.fit(zl[indices])

        psi[j] = np.diag(fa.get_uniquenesses())
        H[j] = fa.loadings_
        psi_inv[j] = np.diag(1 / fa.get_uniquenesses())
        z_nextl[indices] = fa.transform(zl[indices])

        eta[j] = np.mean(zl[indices], axis=0)

    params = {'H': H, 'psi': psi, 'z_nextl': z_nextl, 'eta': eta, 'classes': s}
    return params
예제 #27
0
def loadThem(rotation, factors):
    fa = FactorAnalyzer(rotation=rotation, n_factors=factors)
    fa = fa.fit(df.values)
    loadings = fa.loadings_

    # Visualize factor loadings
    import numpy as np
    Z = np.abs(fa.loadings_)
    fig, ax = plt.subplots()
    c = ax.pcolor(Z)
    fig.colorbar(c, ax=ax)
    ax.set_yticks(np.arange(fa.loadings_.shape[0]) + 0.5, minor=False)
    ax.set_xticks(np.arange(fa.loadings_.shape[1]) + 0.5, minor=False)
    ax.set_title(rotation)
    plt.show()

    vari = fa.get_factor_variance()

    return loadings, vari
예제 #28
0
def fn_biplot_rev(data, col_ind_1, col_ind_2, xlim_lb, xlim_ub, ylim_lb, ylim_ub, labels=None):
 # function
 datavalues = data.copy()
 #     datavalues = data.values

 col_1 = col_ind_1 - 1
 col_2 = col_ind_2 - 1

 xs = datavalues[:, col_1]
 ys = datavalues[:, col_2]
 n = datavalues.shape[1]
 xs_count = len(datavalues)

 scalex = 1.0 / (xs.max() - xs.min())
 scaley = 1.0 / (ys.max() - ys.min())
 plt.scatter(xs, ys, color='r')

 ts = []
 for i in range(xs_count):
  ts.append(plt.text(xs[i], ys[i], 'Q' + str(i + 1)))

 plt.xlim(xlim_lb, xlim_ub)
 plt.ylim(ylim_lb, ylim_ub)

 plt.xlabel("FA{}".format(col_ind_1))
 plt.ylabel("FA{}".format(col_ind_2))
 plt.grid()

 # plot
 fn_biplot_rev(ML_result_2_fa, col_ind_1=1, col_ind_2=2, xlim_lb=-1, xlim_ub=1, ylim_lb=-1, ylim_ub=1)
 plt.show()

 #  orthogonal rotation : varimax  rotation을 varimax로 지정하면 직교회전
 fa = FactorAnalyzer(n_factors=2, rotation='varimax', method="ML")
 fa.fit(data_df)
 ML_varimax_2_fa = fa.loadings_
 ML_varimax_2_fa

 # plot
 ML_varimax_2_fa_mat = ML_varimax_2_fa
 fn_biplot_rev(ML_varimax_2_fa_mat, col_ind_1=1, col_ind_2=2, xlim_lb=-1, xlim_ub=1, ylim_lb=-1, ylim_ub=1)
 plt.show()

 #  oblique rotation : promax
 fa = FactorAnalyzer(n_factors=2, rotation='promax', method="ML")
 fa.fit(data_df)
 ML_promax_2_fa = fa.loadings_
 ML_promax_2_fa

 # plot
 fn_biplot_rev(ML_promax_2_fa, col_ind_1=1, col_ind_2=2, xlim_lb=-1, xlim_ub=1, ylim_lb=-1, ylim_ub=1)
 plt.show()
예제 #29
0
def calculate_py_output(test_name,
                        factors,
                        method,
                        rotation,
                        top_dir=None):
    """
    Use the `FactorAnalyzer()` class to perform the factor analysis
    and return a dictionary with relevant output for given scenario.

    Parameters
    ----------
    test_name : str
        The name of the test
    factors : int
        The number of factors
    method : str
        The rotation method
    rotation : str
        The type of rotation
    top_dir : str, optional
        The top directory for test data
        Defaults to `DATA_DIR``

    Returns
    -------
    output : dict
        A dictionary containing the outputs
        for all `OUTPUT_TYPES`.
    """
    if top_dir is None:
        top_dir = DATA_DIR

    filename = join(top_dir, test_name + '.csv')
    data = pd.read_csv(filename)

    rotation = None if rotation == 'none' else rotation
    method = {'uls': 'minres'}.get(method, method)

    fa = FactorAnalyzer()
    fa.analyze(data, factors, method=method, rotation=rotation)

    evalues, values = fa.get_eigenvalues()

    return {'value': values,
            'evalues': evalues,
            'structure': fa.structure,
            'loading': fa.loadings,
            'uniquenesses': fa.get_uniqueness(),
            'communalities': fa.get_communalities(),
            'scores': fa.get_scores(data)}
예제 #30
0
    def factor_analysis(self, input_x):
        ss_x = StandardScaler().fit_transform(input_x)
        norm_x = normalize(input_x, axis=0)
        factor_number = 9
        fa = FactorAnalyzer(
            n_factors=factor_number,
            rotation='oblimin')  # oblimin/promax varimax:orthogonal
        fa.fit(ss_x)
        ev, v = fa.get_eigenvalues()
        factor_loading_matrix = fa.loadings_
        fa_score = fa.transform(ss_x)
        print('ev', ev)
        # print('v',v)
        # print('factor_loading_matrix',factor_loading_matrix)
        fa_name = list(self.table_data.columns[1::])
        # print('quantization_score', len(fa_name),fa_name)
        for i in range(factor_number):
            all_coefficients = np.sort(factor_loading_matrix[:, i])
            coefficients_index = np.argsort(factor_loading_matrix[:, i])
            print('factor_i', i)
            for j, coefficient in enumerate(all_coefficients):
                if coefficient > 0.5:
                    print('coefficients_index', coefficients_index[j],
                          fa_name[coefficients_index[j]])

        plt.scatter(range(1, input_x.shape[1] + 1), ev)
        plt.plot(range(1, input_x.shape[1] + 1), ev)
        plt.title('scree figure')
        plt.ylabel('eigenvalues')
        plt.grid()
        plt.show()

        return fa_score