def test_dict_learning_online_positivity(transform_algorithm, positive_code, positive_dict): rng = np.random.RandomState(0) n_components = 8 dico = MiniBatchDictionaryLearning( n_components, transform_algorithm=transform_algorithm, random_state=0, positive_code=positive_code, positive_dict=positive_dict).fit(X) code = dico.transform(X) if positive_dict: assert_true((dico.components_ >= 0).all()) else: assert_true((dico.components_ < 0).any()) if positive_code: assert_true((code >= 0).all()) else: assert_true((code < 0).any()) code, dictionary = dict_learning_online(X, n_components=n_components, alpha=1, random_state=rng, positive_dict=positive_dict, positive_code=positive_code) if positive_dict: assert_true((dictionary >= 0).all()) else: assert_true((dictionary < 0).any()) if positive_code: assert_true((code >= 0).all()) else: assert_true((code < 0).any())
def test_dict_learning_online_numerical_consistency(method): # verify numerically consistent among np.float32 and np.float64 rtol = 1e-4 n_components = 4 alpha = 1 U_64, V_64 = dict_learning_online( X.astype(np.float64), n_components=n_components, alpha=alpha, random_state=0, method=method, ) U_32, V_32 = dict_learning_online( X.astype(np.float32), n_components=n_components, alpha=alpha, random_state=0, method=method, ) # Optimal solution (U*, V*) is not unique. # If (U*, V*) is optimal solution, (-U*,-V*) is also optimal, # and (column permutated U*, row permutated V*) are also optional # as long as holding UV. # So here UV, ||U||_1,1 and sum(||V_k||_2) are verified # instead of comparing directly U and V. assert_allclose(np.matmul(U_64, V_64), np.matmul(U_32, V_32), rtol=rtol) assert_allclose(np.sum(np.abs(U_64)), np.sum(np.abs(U_32)), rtol=rtol) assert_allclose(np.sum(V_64 ** 2), np.sum(V_32 ** 2), rtol=rtol) # verify an obtained solution is not degenerate assert np.mean(U_64 != 0.0) > 0.05 assert np.count_nonzero(U_64 != 0.0) == np.count_nonzero(U_32 != 0.0)
def test_dict_learning_online(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.decomposition.dict_learning_online( random_state=self.random_state) expected = decomposition.dict_learning_online( iris.data, random_state=self.random_state) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], pdml.ModelFrame) tm.assert_index_equal(result[0].index, df.data.index) self.assert_numpy_array_almost_equal(result[0].values, expected[0]) self.assertIsInstance(result[1], pdml.ModelFrame) tm.assert_index_equal(result[1].columns, df.data.columns) self.assert_numpy_array_almost_equal(result[1].values, expected[1]) result = df.decomposition.dict_learning_online( return_code=False, random_state=self.random_state) expected = decomposition.dict_learning_online( iris.data, return_code=False, random_state=self.random_state) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.columns, df.data.columns) self.assert_numpy_array_almost_equal(result.values, expected)
def test_dict_learning_online(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.decomposition.dict_learning_online(random_state=self.random_state) expected = decomposition.dict_learning_online(iris.data, random_state=self.random_state) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], pdml.ModelFrame) tm.assert_index_equal(result[0].index, df.data.index) self.assert_numpy_array_almost_equal(result[0].values, expected[0]) self.assertIsInstance(result[1], pdml.ModelFrame) tm.assert_index_equal(result[1].columns, df.data.columns) self.assert_numpy_array_almost_equal(result[1].values, expected[1]) result = df.decomposition.dict_learning_online(return_code=False, random_state=self.random_state) expected = decomposition.dict_learning_online(iris.data, return_code=False, random_state=self.random_state) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.columns, df.data.columns) self.assert_numpy_array_almost_equal(result.values, expected)
def test_dict_learning_online_shapes(): rng = np.random.RandomState(0) n_components = 8 code, dictionary = dict_learning_online( X, n_components=n_components, batch_size=4, max_iter=10, random_state=rng, return_code=True, ) assert code.shape == (n_samples, n_components) assert dictionary.shape == (n_components, n_features) assert np.dot(code, dictionary).shape == X.shape dictionary = dict_learning_online( X, n_components=n_components, batch_size=4, max_iter=10, random_state=rng, return_code=False, ) assert dictionary.shape == (n_components, n_features)
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from io import StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2, random_state=0) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1, random_state=0) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2, random_state=0) finally: sys.stdout = old_stdout assert dico.components_.shape == (n_components, n_features)
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2, random_state=0) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1, random_state=0) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2, random_state=0) sys.stdout = old_stdout assert_true(dico.components_.shape == (n_components, n_features))
def test_batch_size_default_value_future_warning(): # Check that a FutureWarning is raised if batch_size is left to its default value. # FIXME: remove in 1.3 msg = "The default value of batch_size will change" with pytest.warns(FutureWarning, match=msg): dict_learning_online(X, n_components=2, random_state=0) with pytest.warns(FutureWarning, match=msg): MiniBatchDictionaryLearning(n_components=2, random_state=0).fit(X)
def test_dict_learning_online_verbosity(): # test verbosity for better coverage n_components = 5 from io import StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO() # convergence monitoring verbosity dico = MiniBatchDictionaryLearning(n_components, batch_size=4, max_iter=5, verbose=1, tol=0.1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning( n_components, batch_size=4, max_iter=5, verbose=1, max_no_improvement=2, random_state=0, ) dico.fit(X) # higher verbosity level dico = MiniBatchDictionaryLearning(n_components, batch_size=4, max_iter=5, verbose=2, random_state=0) dico.fit(X) # function API verbosity dict_learning_online( X, n_components=n_components, batch_size=4, alpha=1, verbose=1, random_state=0, ) dict_learning_online( X, n_components=n_components, batch_size=4, alpha=1, verbose=2, random_state=0, ) finally: sys.stdout = old_stdout assert dico.components_.shape == (n_components, n_features)
def test_dict_learning_online_deprecated_args(arg, val): # check the deprecation warning for the deprecated args of # dict_learning_online # FIXME: remove in 1.3 depr_msg = ( f"'{arg}' is deprecated in version 1.1 and will be removed in version 1.3." ) with pytest.warns(FutureWarning, match=depr_msg): dict_learning_online( X, n_components=2, batch_size=4, random_state=0, **{arg: val} )
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from cStringIO import StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2) sys.stdout = old_stdout assert_true(dico.components_.shape == (n_components, n_features))
def test_dict_learning_online_shapes(): rng = np.random.RandomState(0) n_components = 8 code, dictionary = dict_learning_online(X, n_components=n_components, alpha=1, random_state=rng) assert_equal(code.shape, (n_samples, n_components)) assert_equal(dictionary.shape, (n_components, n_features)) assert_equal(np.dot(code, dictionary).shape, X.shape)
def test_dict_learning_online_shapes(): rng = np.random.RandomState(0) n_components = 8 code, dictionary = dict_learning_online(X, n_components=n_components, alpha=1, random_state=rng) assert code.shape == (n_samples, n_components) assert dictionary.shape == (n_components, n_features) assert np.dot(code, dictionary).shape == X.shape
def test_dict_learning_online_shapes(): # rng = np.random.RandomState(0) # X = rng.randn(12, 10) n_atoms = 8 code, dictionary = dict_learning_online(X, n_atoms=n_atoms, alpha=1, random_state=rng) assert_equal(code.shape, (n_samples, n_atoms)) assert_equal(dictionary.shape, (n_atoms, n_features)) assert_equal(np.dot(code, dictionary).shape, X.shape)
def make_dictionary(X, n_components=20, alpha=5., write_dir='/tmp/', contrasts=[], method='multitask', l1_ratio=.5, n_subjects=13): """Create dictionary + encoding""" from sklearn.decomposition import dict_learning_online, sparse_encode from sklearn.preprocessing import StandardScaler from sklearn.linear_model import MultiTaskLasso, MultiTaskElasticNet mem = Memory(write_dir, verbose=0) dictionary = mem.cache(initial_dictionary)(n_components, X) np.savez(os.path.join(write_dir, 'dictionary.npz'), loadings=dictionary, contrasts=contrasts) if method == 'online': components, dictionary = dict_learning_online(X.T, n_components, alpha=alpha, dict_init=dictionary, batch_size=200, method='cd', return_code=True, shuffle=True, n_jobs=1, positive_code=True) np.savez(os.path.join(write_dir, 'dictionary.npz'), loadings=dictionary, contrasts=contrasts) elif method == 'sparse': components = sparse_encode(X.T, dictionary, alpha=alpha, max_iter=10, n_jobs=1, check_input=True, verbose=0, positive=True) elif method == 'multitask': # too many hard-typed parameters !!! n_voxels = X.shape[1] // n_subjects components = np.zeros((X.shape[1], n_components)) clf = MultiTaskLasso(alpha=alpha) clf = MultiTaskElasticNet(alpha=alpha, l1_ratio=l1_ratio) for i in range(n_voxels): x = X[:, i:i + n_subjects * n_voxels:n_voxels] components[i: i + n_subjects * n_voxels: n_voxels] =\ clf.fit(dictionary.T, x).coef_ return dictionary, components
def test_dict_learning_online_dtype_match(data_type, expected_type, method): # Verify output matrix dtype rng = np.random.RandomState(0) n_components = 8 code, dictionary = dict_learning_online( X.astype(data_type), n_components=n_components, alpha=1, random_state=rng, method=method, ) assert code.dtype == expected_type assert dictionary.dtype == expected_type
def main(output, image_files): log.info('starting with {} image files'.format(len(image_files))) images = (prep(i) for i in image_files) log.info('starting online dictionary learning') D = None for image in images: D = dict_learning_online(image, dict_init=D, n_components=2000, verbose=True, n_jobs=-1, n_iter=N_ITER, batch_size=BATCH_SIZE, return_code=False) output.write(pd.DataFrame(D).to_csv()) log.info('done')
def main(output, image_files): log.info('starting with {} image files'.format(len(image_files))) images = (prep(i) for i in image_files) log.info('starting online dictionary learning') D = None for image in images: D = dict_learning_online( image, dict_init=D, n_components=2000, verbose=True, n_jobs=-1, n_iter=N_ITER, batch_size=BATCH_SIZE, return_code=False) output.write(pd.DataFrame(D).to_csv()) log.info('done')
def test_dict_learning_online_positivity(positive_code, positive_dict): rng = np.random.RandomState(0) n_components = 8 code, dictionary = dict_learning_online(X, n_components=n_components, method="cd", alpha=1, random_state=rng, positive_dict=positive_dict, positive_code=positive_code) if positive_dict: assert (dictionary >= 0).all() else: assert (dictionary < 0).any() if positive_code: assert (code >= 0).all() else: assert (code < 0).any()
def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self: object Returns the object itself """ from sklearn.decomposition import dict_learning_online code_, dict_ = dict_learning_online(X,n_components=self.n_components, alpha=self.alpha, n_iter=self.n_iter) self.code_ = code_ self.dict_ = dict_ return self
def test_dict_learning_online_lars_positive_parameter(): alpha = 1 err_msg = "Positive constraint not supported for 'lars' coding method." with pytest.raises(ValueError, match=err_msg): dict_learning_online(X, alpha=alpha, positive_code=True)
shape_data = np.load( '/media/keyi/Data/Research/course_project/AdvancedCV_2020/data/COCO17/shape_val2017_128.npy' ) else: print('Not implemented, try n_vertices: 16, 32, 64') exit() out_dict = '/media/keyi/Data/Research/course_project/AdvancedCV_2020/data/COCO17/shape_codes/sparsity/dict_val2017_v{}_b{}_alpha{}.npy'.format( n_vertices, n_atom, alpha) out_code = '/media/keyi/Data/Research/course_project/AdvancedCV_2020/data/COCO17/shape_codes/sparsity/code_val2017_v{}_b{}_alpha{}.npy'.format( n_vertices, n_atom, alpha) n_dim, n_data = shape_data.shape print('Shape data dims: ', shape_data.shape) # dict_learner = DictionaryLearning(n_components=n_atom, alpha=1., max_iter=500) learned_codes, learned_dict = dict_learning_online(np.transpose(shape_data), n_components=n_atom, alpha=alpha, n_iter=2000, batch_size=50, return_code=True) print("Learned dictionary dim: ", learned_dict.shape) print("Learned codes dim: ", learned_codes.shape) # calculate the reconstruction error error = np.sum( (np.matmul(learned_codes, learned_dict) - np.transpose(shape_data))** 2) / n_data print('reconstruction error(frobenius): ', error) np.save(out_dict, learned_dict) np.save(out_code, learned_codes)
def test_dict_learning_online_lars_positive_parameter(): err_msg = "Positive constraint not supported for 'lars' coding method." with pytest.raises(ValueError, match=err_msg): dict_learning_online(X, batch_size=4, max_iter=10, positive_code=True)
def test_dict_learning_online_n_iter_deprecated(): # Check that an error is raised when a deprecated argument is set when max_iter # is also set. msg = "The following arguments are incompatible with 'max_iter'" with pytest.raises(ValueError, match=msg): dict_learning_online(X, max_iter=10, return_inner_stats=True)