Ejemplo n.º 1
0
    def test_analyze_infinite(self):

        data = pd.DataFrame({
            'A': [2, 4, 5, 6, 8, 9],
            'B': [4, 8, float('inf'), 10, 16, 18],
            'C': [6, 12, 15, 12, 26, 27]
        })

        fa = FactorAnalyzer()
        fa.analyze(data, 1, impute='drop')
Ejemplo n.º 2
0
    def test_analyze_rotation_value_error(self):

        data = pd.DataFrame({
            'A': [2, 4, 5, 6, 8, 9],
            'B': [4, 8, np.nan, 10, 16, 18],
            'C': [6, 12, 15, 12, 26, 27]
        })

        fa = FactorAnalyzer()
        fa.analyze(data, 1, rotation='blah')
Ejemplo n.º 3
0
    def test_analyze_impute_drop(self):

        data = pd.DataFrame({
            'A': [2, 4, 5, 6, 8, 9],
            'B': [4, 8, np.nan, 10, 16, 18],
            'C': [6, 12, 15, 12, 26, 27]
        })

        expected = data.copy()
        expected = expected.dropna()
        expected_corr = expected.corr()

        fa = FactorAnalyzer()
        fa.analyze(data, 1, rotation=None, impute='drop')
        assert_frame_equal(fa.corr, expected_corr)
Ejemplo n.º 4
0
    def test_factor_variance(self):

        path = 'tests/data/test01.csv'
        data = pd.read_csv(path)

        fa = FactorAnalyzer()
        fa.analyze(data, 3, rotation=None)
        loadings = fa.loadings

        n_rows = loadings.shape[0]

        # calculate variance
        loadings = loadings**2
        variance = loadings.sum(axis=0)

        # calculate proportional variance
        proportional_variance_expected = variance / n_rows
        proportional_variance = fa.get_factor_variance().loc['Proportion Var']

        proportional_variance_expected.name = ''
        proportional_variance.name = ''

        assert_almost_equal(proportional_variance_expected,
                            proportional_variance)
Ejemplo n.º 5
0
def main():
    """ Run the script.
    """

    # set up an argument parser
    parser = argparse.ArgumentParser(prog='factor_analyzer.py')
    parser.add_argument(
        dest='feature_file',
        help="Input file containing the pre-processed features "
        "for the training data")
    parser.add_argument(
        dest='output_dir',
        help="Output directory to save "
        "the output files",
    )
    parser.add_argument('-f',
                        '--factors',
                        dest="num_factors",
                        type=int,
                        default=3,
                        help="Number of factors to use (Default 3)",
                        required=False)

    parser.add_argument('-r',
                        '--rotation',
                        dest="rotation",
                        type=str,
                        default='none',
                        help="The rotation to perform (Default 'none')",
                        required=False)

    parser.add_argument('-m',
                        '--method',
                        dest="method",
                        type=str,
                        default='minres',
                        help="The method to use (Default 'minres')",
                        required=False)

    # parse given command line arguments
    args = parser.parse_args()

    method = args.method
    factors = args.num_factors
    rotation = None if args.rotation == 'none' else args.rotation

    file_path = args.feature_file

    if not file_path.lower().endswith('.csv'):
        raise ValueError('The feature file must be in CSV format.')

    data = pd.read_csv(file_path)

    # get the logger
    logger = logging.getLogger(__name__)
    logging.setLevel(logging.INFO)

    # log some useful messages so that the user knows
    logger.info(
        "Starting exploratory factor analysis on: {}.".format(file_path))

    # run the analysis
    analyzer = FactorAnalyzer()
    analyzer.analyze(data, factors, rotation, method)

    # create paths to loadings loadings, eigenvalues, communalities, variance
    path_loadings = os.path.join(args.output_dir, 'loadings.csv')
    path_eigen = os.path.join(args.output_dir, 'eigenvalues.csv')
    path_communalities = os.path.join(args.output_dir, 'communalities.csv')
    path_variance = os.path.join(args.output_dir, 'variance.csv')

    # retrieve loadings, eigenvalues, communalities, variance
    loadings = analyzer.loadings
    eigen, _ = analyzer.get_eigenvalues()
    communalities = analyzer.get_communalities()
    variance = analyzer.get_factor_variance()

    # save the files
    logger.info("Saving files...")
    loadings.to_csv(path_loadings)
    eigen.to_csv(path_eigen)
    communalities.to_csv(path_communalities)
    variance.to_csv(path_variance)