Ejemplo n.º 1
0
 def select(self, data_matrix, target=None):
     if sparse.issparse(data_matrix):
         data_matrix = SparseRandomProjection().fit_transform(data_matrix).toarray()
     mf = pymf.SIVM(data_matrix.T, num_bases=self.n_instances)
     mf.factorize()
     basis = mf.W.T
     selected_instances_ids = self._get_ids(data_matrix, basis)
     return selected_instances_ids
Ejemplo n.º 2
0
 def transform(self, data_matrix):
     basis_data_matrix = self.matrix_factorizer.W
     data_matrix_new = self.transformer.transform(data_matrix)
     self.matrix_factorizer = pymf.SIVM(data_matrix_new.T, num_bases=self.complexity)
     self.matrix_factorizer.W = basis_data_matrix
     self.matrix_factorizer.factorize(compute_w=False)
     if self.n_kmeans:
         return self.kmeans.transform(self.matrix_factorizer.H.T)
     else:
         return self.matrix_factorizer.H.T
Ejemplo n.º 3
0
 def fit(self, data_matrix):
     n_rows, n_cols = data_matrix.shape
     if n_rows <= n_cols:
         n_components = n_rows
     elif n_cols < 5000:
         n_components = n_cols
     else:
         n_components = 'auto'
     self.transformer = random_projection.SparseRandomProjection(n_components=n_components,
                                                                 dense_output=True,
                                                                 random_state=self.random_state)
     data_matrix_new = self.transformer.fit_transform(data_matrix)
     self.matrix_factorizer = pymf.SIVM(data_matrix_new.T, num_bases=self.complexity)
     self.matrix_factorizer.factorize()
     if self.n_kmeans:
         self.kmeans = MiniBatchKMeans(n_clusters=self.n_kmeans)
         self.kmeans.fit(self.matrix_factorizer.H.T)
Ejemplo n.º 4
0
def matrix_factorization(data_matrix, n=10):
    mf = pymf.SIVM(data_matrix.T, num_bases=n)
    mf.factorize()
    return mf.W.T, mf.H.T
Ejemplo n.º 5
0
 def select_layer(self, data_matrix):
     mf = pymf.SIVM(data_matrix.T, num_bases=self.n_instances)
     mf.factorize()
     basis = mf.W.T
     selected_instances_ids = self._get_ids(data_matrix, basis)
     return selected_instances_ids