def project_rows(self, X, preprocess=True): ''' Project rows on a new subspace formed by a family of eigenvectors. ''' util.verify_dataframe(X) # Supplementary variables should be preprocessed the same way the active variables were if preprocess: X = X.copy(deep=True) self._preprocess(X) return X.values @ self.svd.V.T
def __init__(self, dataframe, k, plotter): if plotter not in 'mpl': raise ValueError('Unrecognized plotting backend; choose from: mpl') util.verify_dataframe(dataframe) self.X = dataframe self.n, self.p = self.X.shape # Determine the number of components computed during SVD self.k = self.p if k == -1 else min(k, self.p)
def __init__(self, dataframe, nbr_components=2, ignored_variable_names=(), scaled=True, plotter='mpl'): self.ignored_variable_names = ignored_variable_names self.scaled = scaled util.verify_dataframe(dataframe) self._tidy(dataframe) super(PCA, self).__init__(dataframe=dataframe, k=nbr_components, plotter=plotter) self._set_plotter(plotter) self._preprocess(self.X) self._compute_svd()
def test_verify_dataframe_failure(): ''' Check verify_dataframe raises an exception if the passed argument is not a pandas.DataFrame. ''' with pytest.raises(ValueError): util.verify_dataframe((1, 2, 3))
def test_verify_dataframe_success(): ''' Check verify_dataframe doesn't raise and exception if the passed argument is a pandas.DataFrame ''' util.verify_dataframe(pd.DataFrame([1, 2, 3]))