def test_analyze_infinite(self): data = pd.DataFrame({ 'A': [2, 4, 5, 6, 8, 9], 'B': [4, 8, float('inf'), 10, 16, 18], 'C': [6, 12, 15, 12, 26, 27] }) fa = FactorAnalyzer() fa.analyze(data, 1, impute='drop')
def test_analyze_rotation_value_error(self): data = pd.DataFrame({ 'A': [2, 4, 5, 6, 8, 9], 'B': [4, 8, np.nan, 10, 16, 18], 'C': [6, 12, 15, 12, 26, 27] }) fa = FactorAnalyzer() fa.analyze(data, 1, rotation='blah')
def test_analyze_impute_drop(self): data = pd.DataFrame({ 'A': [2, 4, 5, 6, 8, 9], 'B': [4, 8, np.nan, 10, 16, 18], 'C': [6, 12, 15, 12, 26, 27] }) expected = data.copy() expected = expected.dropna() expected_corr = expected.corr() fa = FactorAnalyzer() fa.analyze(data, 1, rotation=None, impute='drop') assert_frame_equal(fa.corr, expected_corr)
def test_factor_variance(self): path = 'tests/data/test01.csv' data = pd.read_csv(path) fa = FactorAnalyzer() fa.analyze(data, 3, rotation=None) loadings = fa.loadings n_rows = loadings.shape[0] # calculate variance loadings = loadings**2 variance = loadings.sum(axis=0) # calculate proportional variance proportional_variance_expected = variance / n_rows proportional_variance = fa.get_factor_variance().loc['Proportion Var'] proportional_variance_expected.name = '' proportional_variance.name = '' assert_almost_equal(proportional_variance_expected, proportional_variance)
def main(): """ Run the script. """ # set up an argument parser parser = argparse.ArgumentParser(prog='factor_analyzer.py') parser.add_argument( dest='feature_file', help="Input file containing the pre-processed features " "for the training data") parser.add_argument( dest='output_dir', help="Output directory to save " "the output files", ) parser.add_argument('-f', '--factors', dest="num_factors", type=int, default=3, help="Number of factors to use (Default 3)", required=False) parser.add_argument('-r', '--rotation', dest="rotation", type=str, default='none', help="The rotation to perform (Default 'none')", required=False) parser.add_argument('-m', '--method', dest="method", type=str, default='minres', help="The method to use (Default 'minres')", required=False) # parse given command line arguments args = parser.parse_args() method = args.method factors = args.num_factors rotation = None if args.rotation == 'none' else args.rotation file_path = args.feature_file if not file_path.lower().endswith('.csv'): raise ValueError('The feature file must be in CSV format.') data = pd.read_csv(file_path) # get the logger logger = logging.getLogger(__name__) logging.setLevel(logging.INFO) # log some useful messages so that the user knows logger.info( "Starting exploratory factor analysis on: {}.".format(file_path)) # run the analysis analyzer = FactorAnalyzer() analyzer.analyze(data, factors, rotation, method) # create paths to loadings loadings, eigenvalues, communalities, variance path_loadings = os.path.join(args.output_dir, 'loadings.csv') path_eigen = os.path.join(args.output_dir, 'eigenvalues.csv') path_communalities = os.path.join(args.output_dir, 'communalities.csv') path_variance = os.path.join(args.output_dir, 'variance.csv') # retrieve loadings, eigenvalues, communalities, variance loadings = analyzer.loadings eigen, _ = analyzer.get_eigenvalues() communalities = analyzer.get_communalities() variance = analyzer.get_factor_variance() # save the files logger.info("Saving files...") loadings.to_csv(path_loadings) eigen.to_csv(path_eigen) communalities.to_csv(path_communalities) variance.to_csv(path_variance)