예제 #1
1
def test_dict_learning_online_positivity(transform_algorithm,
                                         positive_code,
                                         positive_dict):
    rng = np.random.RandomState(0)
    n_components = 8

    dico = MiniBatchDictionaryLearning(
        n_components, transform_algorithm=transform_algorithm, random_state=0,
        positive_code=positive_code, positive_dict=positive_dict).fit(X)
    code = dico.transform(X)
    if positive_dict:
        assert_true((dico.components_ >= 0).all())
    else:
        assert_true((dico.components_ < 0).any())
    if positive_code:
        assert_true((code >= 0).all())
    else:
        assert_true((code < 0).any())

    code, dictionary = dict_learning_online(X, n_components=n_components,
                                            alpha=1, random_state=rng,
                                            positive_dict=positive_dict,
                                            positive_code=positive_code)
    if positive_dict:
        assert_true((dictionary >= 0).all())
    else:
        assert_true((dictionary < 0).any())
    if positive_code:
        assert_true((code >= 0).all())
    else:
        assert_true((code < 0).any())
예제 #2
0
def test_dict_learning_online_numerical_consistency(method):
    # verify numerically consistent among np.float32 and np.float64
    rtol = 1e-4
    n_components = 4
    alpha = 1

    U_64, V_64 = dict_learning_online(
        X.astype(np.float64),
        n_components=n_components,
        alpha=alpha,
        random_state=0,
        method=method,
    )
    U_32, V_32 = dict_learning_online(
        X.astype(np.float32),
        n_components=n_components,
        alpha=alpha,
        random_state=0,
        method=method,
    )

    # Optimal solution (U*, V*) is not unique.
    # If (U*, V*) is optimal solution, (-U*,-V*) is also optimal,
    # and (column permutated U*, row permutated V*) are also optional
    # as long as holding UV.
    # So here UV, ||U||_1,1 and sum(||V_k||_2) are verified
    # instead of comparing directly U and V.
    assert_allclose(np.matmul(U_64, V_64), np.matmul(U_32, V_32), rtol=rtol)
    assert_allclose(np.sum(np.abs(U_64)), np.sum(np.abs(U_32)), rtol=rtol)
    assert_allclose(np.sum(V_64 ** 2), np.sum(V_32 ** 2), rtol=rtol)
    # verify an obtained solution is not degenerate
    assert np.mean(U_64 != 0.0) > 0.05
    assert np.count_nonzero(U_64 != 0.0) == np.count_nonzero(U_32 != 0.0)
예제 #3
0
    def test_dict_learning_online(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.decomposition.dict_learning_online(
            random_state=self.random_state)
        expected = decomposition.dict_learning_online(
            iris.data, random_state=self.random_state)

        self.assertEqual(len(result), 2)
        self.assertIsInstance(result[0], pdml.ModelFrame)
        tm.assert_index_equal(result[0].index, df.data.index)
        self.assert_numpy_array_almost_equal(result[0].values, expected[0])

        self.assertIsInstance(result[1], pdml.ModelFrame)
        tm.assert_index_equal(result[1].columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result[1].values, expected[1])

        result = df.decomposition.dict_learning_online(
            return_code=False, random_state=self.random_state)
        expected = decomposition.dict_learning_online(
            iris.data, return_code=False, random_state=self.random_state)
        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_index_equal(result.columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result.values, expected)
예제 #4
0
    def test_dict_learning_online(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.decomposition.dict_learning_online(random_state=self.random_state)
        expected = decomposition.dict_learning_online(iris.data,
                                                      random_state=self.random_state)

        self.assertEqual(len(result), 2)
        self.assertIsInstance(result[0], pdml.ModelFrame)
        tm.assert_index_equal(result[0].index, df.data.index)
        self.assert_numpy_array_almost_equal(result[0].values, expected[0])

        self.assertIsInstance(result[1], pdml.ModelFrame)
        tm.assert_index_equal(result[1].columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result[1].values, expected[1])

        result = df.decomposition.dict_learning_online(return_code=False,
                                                       random_state=self.random_state)
        expected = decomposition.dict_learning_online(iris.data,
                                                      return_code=False,
                                                      random_state=self.random_state)
        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_index_equal(result.columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result.values, expected)
예제 #5
0
def test_dict_learning_online_shapes():
    rng = np.random.RandomState(0)
    n_components = 8

    code, dictionary = dict_learning_online(
        X,
        n_components=n_components,
        batch_size=4,
        max_iter=10,
        random_state=rng,
        return_code=True,
    )
    assert code.shape == (n_samples, n_components)
    assert dictionary.shape == (n_components, n_features)
    assert np.dot(code, dictionary).shape == X.shape

    dictionary = dict_learning_online(
        X,
        n_components=n_components,
        batch_size=4,
        max_iter=10,
        random_state=rng,
        return_code=False,
    )
    assert dictionary.shape == (n_components, n_features)
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from io import StringIO
    import sys

    old_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        dico = MiniBatchDictionaryLearning(n_components,
                                           n_iter=20,
                                           verbose=1,
                                           random_state=0)
        dico.fit(X)
        dico = MiniBatchDictionaryLearning(n_components,
                                           n_iter=20,
                                           verbose=2,
                                           random_state=0)
        dico.fit(X)
        dict_learning_online(X,
                             n_components=n_components,
                             alpha=1,
                             verbose=1,
                             random_state=0)
        dict_learning_online(X,
                             n_components=n_components,
                             alpha=1,
                             verbose=2,
                             random_state=0)
    finally:
        sys.stdout = old_stdout

    assert dico.components_.shape == (n_components, n_features)
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from sklearn.externals.six.moves import cStringIO as StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    dico = MiniBatchDictionaryLearning(n_components,
                                       n_iter=20,
                                       verbose=1,
                                       random_state=0)
    dico.fit(X)
    dico = MiniBatchDictionaryLearning(n_components,
                                       n_iter=20,
                                       verbose=2,
                                       random_state=0)
    dico.fit(X)
    dict_learning_online(X,
                         n_components=n_components,
                         alpha=1,
                         verbose=1,
                         random_state=0)
    dict_learning_online(X,
                         n_components=n_components,
                         alpha=1,
                         verbose=2,
                         random_state=0)
    sys.stdout = old_stdout
    assert_true(dico.components_.shape == (n_components, n_features))
def test_batch_size_default_value_future_warning():
    # Check that a FutureWarning is raised if batch_size is left to its default value.
    # FIXME: remove in 1.3
    msg = "The default value of batch_size will change"
    with pytest.warns(FutureWarning, match=msg):
        dict_learning_online(X, n_components=2, random_state=0)

    with pytest.warns(FutureWarning, match=msg):
        MiniBatchDictionaryLearning(n_components=2, random_state=0).fit(X)
예제 #9
0
def test_dict_learning_online_verbosity():
    # test verbosity for better coverage
    n_components = 5
    from io import StringIO
    import sys

    old_stdout = sys.stdout
    try:
        sys.stdout = StringIO()

        # convergence monitoring verbosity
        dico = MiniBatchDictionaryLearning(n_components,
                                           batch_size=4,
                                           max_iter=5,
                                           verbose=1,
                                           tol=0.1,
                                           random_state=0)
        dico.fit(X)
        dico = MiniBatchDictionaryLearning(
            n_components,
            batch_size=4,
            max_iter=5,
            verbose=1,
            max_no_improvement=2,
            random_state=0,
        )
        dico.fit(X)
        # higher verbosity level
        dico = MiniBatchDictionaryLearning(n_components,
                                           batch_size=4,
                                           max_iter=5,
                                           verbose=2,
                                           random_state=0)
        dico.fit(X)

        # function API verbosity
        dict_learning_online(
            X,
            n_components=n_components,
            batch_size=4,
            alpha=1,
            verbose=1,
            random_state=0,
        )
        dict_learning_online(
            X,
            n_components=n_components,
            batch_size=4,
            alpha=1,
            verbose=2,
            random_state=0,
        )
    finally:
        sys.stdout = old_stdout

    assert dico.components_.shape == (n_components, n_features)
def test_dict_learning_online_deprecated_args(arg, val):
    # check the deprecation warning for the deprecated args of
    # dict_learning_online
    # FIXME: remove in 1.3
    depr_msg = (
        f"'{arg}' is deprecated in version 1.1 and will be removed in version 1.3."
    )

    with pytest.warns(FutureWarning, match=depr_msg):
        dict_learning_online(
            X, n_components=2, batch_size=4, random_state=0, **{arg: val}
        )
예제 #11
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from cStringIO import StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1)
    dico.fit(X)
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2)
    dico.fit(X)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=1)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=2)
    sys.stdout = old_stdout
    assert_true(dico.components_.shape == (n_components, n_features))
예제 #12
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from cStringIO import StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1)
    dico.fit(X)
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2)
    dico.fit(X)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=1)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=2)
    sys.stdout = old_stdout
    assert_true(dico.components_.shape == (n_components, n_features))
def test_dict_learning_online_shapes():
    rng = np.random.RandomState(0)
    n_components = 8
    code, dictionary = dict_learning_online(X, n_components=n_components,
                                            alpha=1, random_state=rng)
    assert_equal(code.shape, (n_samples, n_components))
    assert_equal(dictionary.shape, (n_components, n_features))
    assert_equal(np.dot(code, dictionary).shape, X.shape)
예제 #14
0
def test_dict_learning_online_shapes():
    rng = np.random.RandomState(0)
    n_components = 8
    code, dictionary = dict_learning_online(X, n_components=n_components,
                                            alpha=1, random_state=rng)
    assert code.shape == (n_samples, n_components)
    assert dictionary.shape == (n_components, n_features)
    assert np.dot(code, dictionary).shape == X.shape
예제 #15
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from sklearn.externals.six.moves import cStringIO as StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1,
                                       random_state=0)
    dico.fit(X)
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2,
                                       random_state=0)
    dico.fit(X)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=1,
                         random_state=0)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=2,
                         random_state=0)
    sys.stdout = old_stdout
    assert_true(dico.components_.shape == (n_components, n_features))
예제 #16
0
def test_dict_learning_online_shapes():
    #    rng = np.random.RandomState(0)
    #    X = rng.randn(12, 10)
    n_atoms = 8
    code, dictionary = dict_learning_online(X,
                                            n_atoms=n_atoms,
                                            alpha=1,
                                            random_state=rng)
    assert_equal(code.shape, (n_samples, n_atoms))
    assert_equal(dictionary.shape, (n_atoms, n_features))
    assert_equal(np.dot(code, dictionary).shape, X.shape)
def make_dictionary(X,
                    n_components=20,
                    alpha=5.,
                    write_dir='/tmp/',
                    contrasts=[],
                    method='multitask',
                    l1_ratio=.5,
                    n_subjects=13):
    """Create dictionary + encoding"""
    from sklearn.decomposition import dict_learning_online, sparse_encode
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import MultiTaskLasso, MultiTaskElasticNet

    mem = Memory(write_dir, verbose=0)
    dictionary = mem.cache(initial_dictionary)(n_components, X)
    np.savez(os.path.join(write_dir, 'dictionary.npz'),
             loadings=dictionary,
             contrasts=contrasts)
    if method == 'online':
        components, dictionary = dict_learning_online(X.T,
                                                      n_components,
                                                      alpha=alpha,
                                                      dict_init=dictionary,
                                                      batch_size=200,
                                                      method='cd',
                                                      return_code=True,
                                                      shuffle=True,
                                                      n_jobs=1,
                                                      positive_code=True)
        np.savez(os.path.join(write_dir, 'dictionary.npz'),
                 loadings=dictionary,
                 contrasts=contrasts)
    elif method == 'sparse':
        components = sparse_encode(X.T,
                                   dictionary,
                                   alpha=alpha,
                                   max_iter=10,
                                   n_jobs=1,
                                   check_input=True,
                                   verbose=0,
                                   positive=True)
    elif method == 'multitask':
        # too many hard-typed parameters !!!
        n_voxels = X.shape[1] // n_subjects
        components = np.zeros((X.shape[1], n_components))
        clf = MultiTaskLasso(alpha=alpha)
        clf = MultiTaskElasticNet(alpha=alpha, l1_ratio=l1_ratio)
        for i in range(n_voxels):
            x = X[:, i:i + n_subjects * n_voxels:n_voxels]
            components[i: i + n_subjects * n_voxels: n_voxels] =\
                clf.fit(dictionary.T, x).coef_
    return dictionary, components
예제 #18
0
def test_dict_learning_online_dtype_match(data_type, expected_type, method):
    # Verify output matrix dtype
    rng = np.random.RandomState(0)
    n_components = 8
    code, dictionary = dict_learning_online(
        X.astype(data_type),
        n_components=n_components,
        alpha=1,
        random_state=rng,
        method=method,
    )
    assert code.dtype == expected_type
    assert dictionary.dtype == expected_type
예제 #19
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from io import StringIO
    import sys

    old_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1,
                                           random_state=0)
        dico.fit(X)
        dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2,
                                           random_state=0)
        dico.fit(X)
        dict_learning_online(X, n_components=n_components, alpha=1, verbose=1,
                             random_state=0)
        dict_learning_online(X, n_components=n_components, alpha=1, verbose=2,
                             random_state=0)
    finally:
        sys.stdout = old_stdout

    assert dico.components_.shape == (n_components, n_features)
예제 #20
0
def main(output, image_files):
    log.info('starting with {} image files'.format(len(image_files)))
    images = (prep(i) for i in image_files)
    log.info('starting online dictionary learning')
    D = None
    for image in images:
        D = dict_learning_online(image,
                                 dict_init=D,
                                 n_components=2000,
                                 verbose=True,
                                 n_jobs=-1,
                                 n_iter=N_ITER,
                                 batch_size=BATCH_SIZE,
                                 return_code=False)
    output.write(pd.DataFrame(D).to_csv())
    log.info('done')
예제 #21
0
def main(output, image_files):
    log.info('starting with {} image files'.format(len(image_files)))
    images = (prep(i) for i in image_files)
    log.info('starting online dictionary learning')
    D = None
    for image in images:
        D = dict_learning_online(
            image,
            dict_init=D,
            n_components=2000,
            verbose=True,
            n_jobs=-1,
            n_iter=N_ITER,
            batch_size=BATCH_SIZE,
            return_code=False)
    output.write(pd.DataFrame(D).to_csv())
    log.info('done')
예제 #22
0
def test_dict_learning_online_positivity(positive_code,
                                         positive_dict):
    rng = np.random.RandomState(0)
    n_components = 8

    code, dictionary = dict_learning_online(X, n_components=n_components,
                                            method="cd",
                                            alpha=1, random_state=rng,
                                            positive_dict=positive_dict,
                                            positive_code=positive_code)
    if positive_dict:
        assert (dictionary >= 0).all()
    else:
        assert (dictionary < 0).any()
    if positive_code:
        assert (code >= 0).all()
    else:
        assert (code < 0).any()
예제 #23
0
파일: depr.py 프로젝트: wtak23/pytak
    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self: object
            Returns the object itself
        """
        from sklearn.decomposition import dict_learning_online
        code_, dict_ = dict_learning_online(X,n_components=self.n_components,
                                            alpha=self.alpha,
                                            n_iter=self.n_iter)
        self.code_ = code_
        self.dict_ = dict_
        return self
예제 #24
0
def test_dict_learning_online_lars_positive_parameter():
    alpha = 1
    err_msg = "Positive constraint not supported for 'lars' coding method."
    with pytest.raises(ValueError, match=err_msg):
        dict_learning_online(X, alpha=alpha, positive_code=True)
예제 #25
0
    shape_data = np.load(
        '/media/keyi/Data/Research/course_project/AdvancedCV_2020/data/COCO17/shape_val2017_128.npy'
    )
else:
    print('Not implemented, try n_vertices: 16, 32, 64')
    exit()
out_dict = '/media/keyi/Data/Research/course_project/AdvancedCV_2020/data/COCO17/shape_codes/sparsity/dict_val2017_v{}_b{}_alpha{}.npy'.format(
    n_vertices, n_atom, alpha)
out_code = '/media/keyi/Data/Research/course_project/AdvancedCV_2020/data/COCO17/shape_codes/sparsity/code_val2017_v{}_b{}_alpha{}.npy'.format(
    n_vertices, n_atom, alpha)
n_dim, n_data = shape_data.shape
print('Shape data dims: ', shape_data.shape)
# dict_learner = DictionaryLearning(n_components=n_atom, alpha=1., max_iter=500)
learned_codes, learned_dict = dict_learning_online(np.transpose(shape_data),
                                                   n_components=n_atom,
                                                   alpha=alpha,
                                                   n_iter=2000,
                                                   batch_size=50,
                                                   return_code=True)

print("Learned dictionary dim: ", learned_dict.shape)
print("Learned codes dim: ", learned_codes.shape)

# calculate the reconstruction error
error = np.sum(
    (np.matmul(learned_codes, learned_dict) - np.transpose(shape_data))**
    2) / n_data
print('reconstruction error(frobenius): ', error)

np.save(out_dict, learned_dict)
np.save(out_code, learned_codes)
def test_dict_learning_online_lars_positive_parameter():
    err_msg = "Positive constraint not supported for 'lars' coding method."
    with pytest.raises(ValueError, match=err_msg):
        dict_learning_online(X, batch_size=4, max_iter=10, positive_code=True)
def test_dict_learning_online_n_iter_deprecated():
    # Check that an error is raised when a deprecated argument is set when max_iter
    # is also set.
    msg = "The following arguments are incompatible with 'max_iter'"
    with pytest.raises(ValueError, match=msg):
        dict_learning_online(X, max_iter=10, return_inner_stats=True)