def test_compute_pca_less_components_than_features(self): # test pca when we have less components than features df = pd.DataFrame({'a': range(100)}) for i in range(100): df[i] = df['a'] * i (components, variance) = Analyzer.compute_pca(df, df.columns) assert_equal(len(components.columns), 100) assert_equal(len(variance.columns), 100)
def test_compute_pca_less_samples_than_features(self): # test pca when we have less samples than # features. In this case the number of components # equals to the number of samples. df = pd.DataFrame({'a': range(50)}) for i in range(100): df[i] = df['a'] * i (components, variance) = Analyzer.compute_pca(df, df.columns) assert_equal(len(components.columns), 50) assert_equal(len(variance.columns), 50)
def test_compute_pca_less_samples_than_features(self): # test pca when we have less samples than # features. In this case the number of components # equals to the number of samples. dfs = [] # to avoid inserting too many columns, # we create a list of data frames and then # concatenate them together for i in range(1, 101): dfs.append(pd.DataFrame({i: pd.Series(range(50)) * i})) df = pd.concat(dfs, axis=1) (components, variance) = Analyzer.compute_pca(df, df.columns) assert_equal(len(components.columns), 50) assert_equal(len(variance.columns), 50)