def select(self, data_matrix, target=None): if sparse.issparse(data_matrix): data_matrix = SparseRandomProjection().fit_transform(data_matrix).toarray() mf = pymf.SIVM(data_matrix.T, num_bases=self.n_instances) mf.factorize() basis = mf.W.T selected_instances_ids = self._get_ids(data_matrix, basis) return selected_instances_ids
def transform(self, data_matrix): basis_data_matrix = self.matrix_factorizer.W data_matrix_new = self.transformer.transform(data_matrix) self.matrix_factorizer = pymf.SIVM(data_matrix_new.T, num_bases=self.complexity) self.matrix_factorizer.W = basis_data_matrix self.matrix_factorizer.factorize(compute_w=False) if self.n_kmeans: return self.kmeans.transform(self.matrix_factorizer.H.T) else: return self.matrix_factorizer.H.T
def fit(self, data_matrix): n_rows, n_cols = data_matrix.shape if n_rows <= n_cols: n_components = n_rows elif n_cols < 5000: n_components = n_cols else: n_components = 'auto' self.transformer = random_projection.SparseRandomProjection(n_components=n_components, dense_output=True, random_state=self.random_state) data_matrix_new = self.transformer.fit_transform(data_matrix) self.matrix_factorizer = pymf.SIVM(data_matrix_new.T, num_bases=self.complexity) self.matrix_factorizer.factorize() if self.n_kmeans: self.kmeans = MiniBatchKMeans(n_clusters=self.n_kmeans) self.kmeans.fit(self.matrix_factorizer.H.T)
def matrix_factorization(data_matrix, n=10): mf = pymf.SIVM(data_matrix.T, num_bases=n) mf.factorize() return mf.W.T, mf.H.T
def select_layer(self, data_matrix): mf = pymf.SIVM(data_matrix.T, num_bases=self.n_instances) mf.factorize() basis = mf.W.T selected_instances_ids = self._get_ids(data_matrix, basis) return selected_instances_ids