Beispiel #1
0
class PCAImputer:
    def __init__(self, n_dimension):
        self._q = n_dimension

    def fit_transform(self,
                      data,
                      method='eig',
                      probabilistic=False,
                      n_iteration=100):
        """fitting a PCA to the original data by iterativly filling the missing entries
        with value generated from PCA. Each missing entries are initialized with the
        row mean."""
        self._data = data.copy()
        self._missing = np.isnan(data)
        self._observed = ~self._missing
        self._pca = PPCA(n_dimension=self._q)

        row_defau = np.zeros(self._data.shape[0])
        row_means = np.repeat(np.nanmean(self._data, axis=1, out=row_defau).reshape(-1, 1), \
                              self._data.shape[1], axis=1)
        self._data[self._missing] = row_means[self._missing]
        for i in range(n_iteration):
            self._pca.fit(self._data, method=method)
            self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data, \
                                        probabilistic), probabilistic)[self._missing]
        return self._data
Beispiel #2
0
    plt.show()
    
    print('\n\n\n\n**TEST CALCULATING LIKELIHOOD**')
    ppca1 = PPCA(n_dimension=2)
    loglikelihoods = ppca1.fit(data, method='EM', keep_loglikes=True)
    plt.plot(loglikelihoods)
    plt.show()

    print('\n\n\n\n**TEST DIMENSION REDUCTION AND RECOVERING**')
    plt.matshow(data)
    print('\n\noriginal data')
    plt.show()
    
    ppca3 = PPCA(n_dimension=2)
    ppca3.fit(data, method='EM')
    plt.matshow( ppca3.inverse_transform( ppca3.transform(data) ) )
    print('\n\nrecovered data: 2-component')
    plt.show()
    
    ppca4 = PPCA(n_dimension=2)
    ppca4.fit(data, batchsize=16, n_iteration=2000, method='EM')
    plt.matshow( ppca4.inverse_transform( ppca4.transform(data) ) )
    print('\n\nrecovered data: 2-component (mini-batch)')
    plt.show()
    
    ppca5 = PPCA(n_dimension=63)
    ppca5.fit(data, method='EM')
    plt.matshow( ppca5.inverse_transform( ppca5.transform(data) ) )
    print('\n\nrecovered data: 63-component')
    plt.show()