def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'):

    import os.path as osp
    fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd))
    if osp.exists(fname):
        D = np.load(fname)
    else:
        from sklearn.decomposition import DictionaryLearning
        mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
        im = mnist.train.next_batch(N)[0]
        im = im.reshape(N, 28, 28)
        im = [
            imresize(a, (17, 17), interp='bilinear', mode='L') - .5 for a in im
        ]
        X = np.array(im).reshape(N, -1)
        print(X.shape)

        dl = DictionaryLearning(K,
                                alpha=lmbd * N,
                                fit_algorithm='cd',
                                n_jobs=-1,
                                verbose=1)
        dl.fit(X)
        D = dl.components_.reshape(K, -1)
        np.save(fname, D)
    return D
Exemple #2
0
    def __init__(self,
                 num_components=10,
                 catalog_name='unknown',
                 alpha=0.001,
                 transform_alpha=0.01,
                 max_iter=2000,
                 tol=1e-9,
                 n_jobs=1,
                 verbose=True,
                 random_state=None):

        self._decomposition = 'Sparse Coding'
        self._num_components = num_components
        self._catalog_name = catalog_name
        self._alpha = alpha
        self._transform_alpha = 0.001
        self._n_jobs = n_jobs
        self._random_state = random_state

        self._DL = DictionaryLearning(n_components=self._num_components,
                                      alpha=self._alpha,
                                      transform_alpha=self._transform_alpha,
                                      n_jobs=self._n_jobs,
                                      verbose=verbose,
                                      random_state=self._random_state)
Exemple #3
0
def test_dict_learning_lassocd_readonly_data():
    n_components = 12
    with TempMemmap(X) as X_read_only:
        dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd',
                                  transform_alpha=0.001, random_state=0, n_jobs=-1)
        code = dico.fit(X_read_only).transform(X_read_only)
        assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
Exemple #4
0
def learn_dictionary(data, n_components):
    model = DictionaryLearning(n_components=n_components,
                               alpha=1.0,
                               max_iter=200)
    model = model.fit(data.T)
    dictionary = model.components_.T
    return dictionary
Exemple #5
0
    def _get_stain_matrix(self, input_image: np.ndarray) -> np.ndarray:
        """Compute the 2x3 stain matrix with the method from the paper

        Args:
            input_image (np.array): Image to extract the stains from

        Returns:
            np.array: Extracted stains
        """
        mask = self._notwhite_mask(input_image, threshold=self.thres).reshape(
            (-1, ))
        optical_density = self._rgb_to_od(input_image).reshape((-1, 3))
        optical_density = optical_density[mask]
        n_features = optical_density.T.shape[1]

        dict_learner = DictionaryLearning(
            n_components=2,
            alpha=self.lambda_s,
            max_iter=10,
            fit_algorithm="lars",
            transform_algorithm="lasso_lars",
            transform_n_nonzero_coefs=n_features,
            random_state=0,
            positive_dict=True,
        )
        dictionary = dict_learner.fit_transform(optical_density.T).T

        if dictionary[0, 0] < dictionary[1, 0]:
            dictionary = dictionary[[1, 0], :]
        dictionary = self._normalize_rows(dictionary)
        return dictionary
    def __init__(self, model_filename=None):
        if model_filename is not None:
            self.load_model(model_filename)
        else:
            # default model params
            self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS[
                'n_components']
            self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features']
            self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter']
            self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS[
                'random_state']
            self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init']
            self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init']

            # initialize Dictionary Learning object with default params and weights
            self.DL_obj = DictionaryLearning(n_components=self.n_components,
                                             alpha=1,
                                             max_iter=self.max_iter,
                                             tol=1e-08,
                                             fit_algorithm='lars',
                                             transform_algorithm='omp',
                                             transform_n_nonzero_coefs=None,
                                             transform_alpha=None,
                                             n_jobs=1,
                                             code_init=self.code_init,
                                             dict_init=self.dict_init,
                                             verbose=False,
                                             split_sign=False,
                                             random_state=self.random_state)
Exemple #7
0
def _dictionarylearning(Y, n, k, iter_=100000):
    dct = DictionaryLearning(n_components=n,
                             transform_algorithm='lars',
                             transform_n_nonzero_coefs=k,
                             max_iter=iter_)
    dct.fit(Y.T)
    A_new = dct.components_
    return A_new.T
def test_dict_learning_split():
    n_atoms = 5
    dico = DictionaryLearning(n_atoms, transform_algorithm='threshold')
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
def test_dict_learning_split():
    n_atoms = 5
    dico = DictionaryLearning(n_atoms, transform_algorithm='threshold')
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
Exemple #10
0
def learn_dictionary(patches, n_c=512, a=1, n_i=100, n_j=3, es=5, fit_algorithm='lars'):
    dic = DictionaryLearning(n_components=n_c, alpha=a, max_iter=n_i,
                             n_jobs=n_j, fit_algorithm=fit_algorithm)
    print ("Start learning dictionary: n_c: "+str(n_c)+", alpha: "+str(a)+", n_i: " +
           str(n_i)+", es: "+str(es)+", n_j: "+str(n_j))
    v2 = dic.fit(patches).components_
    d2 = v2.reshape(n_c, es, es, es)  # e.g. 512x5x5x5
    return d2
Exemple #11
0
def test_dict_learning_shapes():
    n_components = 5
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert dico.components_.shape == (n_components, n_features)

    n_components = 1
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert dico.components_.shape == (n_components, n_features)
    assert dico.transform(X).shape == (X.shape[0], n_components)
def test_dict_learning_shapes():
    n_components = 5
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert_equal(dico.components_.shape, (n_components, n_features))

    n_components = 1
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert_equal(dico.components_.shape, (n_components, n_features))
    assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
Exemple #13
0
 def _fit(self, feature_values, n_components):
     # reshape to two dimensional vector (list of 1d features)
     feature_vector = feature_values.reshape(feature_values.shape[0], -1)
     dictionary = DictionaryLearning(n_components=n_components,
                                     alpha=1,
                                     max_iter=500,
                                     n_jobs=self.threads)
     codebook = dictionary.fit(feature_vector)
     return codebook
Exemple #14
0
def dictionaryLearningMethod(inputVec, length):
    # length = len(vectors)
    length = length
    dic = DictionaryLearning(n_components=length, alpha=0.05)
    dictionary = dic.fit(inputVec).components_
    sparseCoder = dic.fit_transform(inputVec)
    # sparseCoder = dic.fit(vectors).components_
    # dictionary = dic.fit_transform(vectors)

    return dictionary, sparseCoder
Exemple #15
0
def dictionaryLearningMethod(vectors):
    # length = len(vectors)
    length = 5
    dic = DictionaryLearning(n_components=length, alpha=1)
    dictionary = dic.fit(vectors).components_
    sparseCoder = dic.fit_transform(vectors)
    # sparseCoder = dic.fit(vectors).components_
    # dictionary = dic.fit_transform(vectors)

    return dictionary.T , sparseCoder.T
def test_dict_learning_lars_code_positivity():
    n_components = 5
    dico = DictionaryLearning(
        n_components, transform_algorithm="lars", random_state=0,
        positive_code=True, fit_algorithm="cd").fit(X)

    err_msg = "Positive constraint not supported for '{}' coding method."
    err_msg = err_msg.format("lars")
    with pytest.raises(ValueError, match=err_msg):
        dico.transform(X)
def trainLowDict(buffer):
    print('Learning the dictionary...')
    t0 = time()
    dico = DictionaryLearning(n_components=100, alpha=1, max_iter=100,verbose=1)

    V = dico.fit(buffer).components_
    E = dico.error_
    dt = time() - t0
    print('done in %.2fs.' % dt)
    return V,E
def test_dict_learning_split():
    n_components = 5
    dico = DictionaryLearning(n_components, transform_algorithm='threshold',
                              random_state=0)
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_almost_equal(split_code[:, :n_components] -
                              split_code[:, n_components:], code)
Exemple #19
0
 def sparse_code(X_mat,
                 n_comps=12,
                 alpha=1,
                 out_dir=oj(config.DIR_INTERIM, 'dictionaries')):
     print('sparse coding...')
     d = DictionaryLearning(n_components=n_comps,
                            alpha=alpha,
                            random_state=42)
     d.fit(X_mat)
     pkl.dump(d, open(oj(out_dir, f'sc_{n_comps}_alpha={alpha}.pkl'), 'wb'))
def test_dict_learning_split():
    n_components = 5
    dico = DictionaryLearning(n_components, transform_algorithm='threshold',
                              random_state=0)
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_components] -
                       split_code[:, n_components:], code)
def test_dict_learning_reconstruction():
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def sparse_coding(dimension, input_x, alpha, iteration, tolerance):
	#dl = DictionaryLearning(dimension)
	dl = DictionaryLearning(dimension, alpha, iteration, tolerance) 
	dl.fit(input_x)
	#np.set_printoptions(precision=3, suppress=True)
	#print code
	#print dl.components_
	print "error:", dl.error_[-1]
	
	return dl
def test_dict_learning_nonzero_coefs():
    n_components = 4
    dico = DictionaryLearning(n_components, transform_algorithm='lars',
                              transform_n_nonzero_coefs=3, random_state=0)
    code = dico.fit(X).transform(X[np.newaxis, 1])
    assert_true(len(np.flatnonzero(code)) == 3)

    dico.set_params(transform_algorithm='omp')
    code = dico.transform(X[np.newaxis, 1])
    assert_equal(len(np.flatnonzero(code)), 3)
def test_dict_learning_reconstruction_parallel():
    # regression test that parallel reconstruction works with n_jobs=-1
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0, n_jobs=-1)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
Exemple #25
0
 def _estimate_linear_combination(self, imgs_vec, params):
     estimator = DictionaryLearning(n_components=params.get('nb_labels'),
                                    max_iter=params.get('max_iter'),
                                    fit_algorithm='lars',
                                    transform_algorithm='omp',
                                    split_sign=False,
                                    tol=params.get('tol'),
                                    n_jobs=1)
     fit_result = estimator.fit_transform(imgs_vec)
     components = estimator.components_
     return estimator, components, fit_result
Exemple #26
0
def dictionary_learn():
    x = [[1, -2, 3, 4, 5.], [3, 4, -5, 6, 7], [1, 7, 2, -6, 2],
         [3, 8, 6, 2, -8]]
    print(x)

    dct = DictionaryLearning(n_components=5)
    dct.fit(x)

    print(dct.components_)
    print(dct.transform(x))

    pass
Exemple #27
0
def test_DictionaryLearning(n_components):
    '''
    测试 DictionaryLearning 的用法

    :return: None
    '''
    X = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [10, 9, 8, 7, 6], [5, 4, 3, 2, 1]]
    print("before transform:", X)
    dct = DictionaryLearning(n_components=n_components)
    dct.fit(X)
    print("components is :", dct.components_)
    print("after transform:", dct.transform(X))
Exemple #28
0
def test_DictionaryLearning():
    from sklearn.decomposition import DictionaryLearning
    x = [
        [1, 2, 3, 4, 5],
        [6, 7, 8, 9, 10],
        [10, 9, 8, 7, 6],
        [5, 4, 3, 2, 1]
    ]
    print("before transform:", x)
    dct = DictionaryLearning(n_components=3)
    dct.fit(x)
    print("components is :", dct.components_)
    print("after transform:", dct.transform(x))
Exemple #29
0
 def DL(self):
     dictLearn = DictionaryLearning(n_components=self.n_components,
                                    alpha=1,
                                    transform_algorithm='omp',
                                    transform_n_nonzero_coefs=20)
     self.dictionary = []
     gamma = []
     #print self.cluster[0]
     #print self.cluster
     for i in range(self.n_clusters):
         dictObject = dictLearn.fit(self.cluster[i])
         self.dictionary.append(dictObject.components_)
         gamma.append(dictObject.transform(self.cluster[i]))
Exemple #30
0
def trainLowDict(buffer):
    print('Learning the dictionary...')
    t0 = time()
    dico = DictionaryLearning(n_components=100,
                              alpha=1,
                              max_iter=100,
                              verbose=1)

    V = dico.fit(buffer).components_
    E = dico.error_
    dt = time() - t0
    print('done in %.2fs.' % dt)
    return V, E
class _DictionaryLearningImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
def test_dict_learning_lassocd_readonly_data():
    n_components = 12
    with TempMemmap(X) as X_read_only:
        dico = DictionaryLearning(
            n_components,
            transform_algorithm="lasso_cd",
            transform_alpha=0.001,
            random_state=0,
            n_jobs=4,
        )
        with ignore_warnings(category=ConvergenceWarning):
            code = dico.fit(X_read_only).transform(X_read_only)
        assert_array_almost_equal(np.dot(code, dico.components_),
                                  X_read_only,
                                  decimal=2)
 def Dictionary_learning(X):
     # input: X (rows are features, columns are samples)
     # Outputs: U (columns are projection directions) --> note: X = UV
     n_dimensions = X.shape[0]
     n_samples = X.shape[1]
     k = min(n_dimensions, n_samples)
     # model = DictionaryLearning(n_components=k, tol=1e-20, max_iter=int(1e4))
     model = DictionaryLearning(n_components=k)
     model.fit(X)
     V = model.components_
     U = X.dot(V.T).dot(
         np.linalg.inv(V.dot(V.T))
     )  # X=UV --> X:d*n, U:d*k, V:k*n --> XV' = UVV' --> U = XV'(VV')^{-1}
     # print(X.shape, U.shape, V.shape, k)
     return U, k
Exemple #34
0
    def __init__(self, model_filename=None):
        if model_filename is not None:
            self.load_model(model_filename)
        else:
            # default model params
            self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components']
            self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features']
            self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter']
            self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state']
            self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init']
            self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init']

            # initialize Dictionary Learning object with default params and weights
            self.DL_obj = DictionaryLearning(n_components=self.n_components,
                                       alpha=1,
                                       max_iter=self.max_iter,
                                       tol=1e-08,
                                       fit_algorithm='lars',
                                       transform_algorithm='omp',
                                       transform_n_nonzero_coefs=None,
                                       transform_alpha=None,
                                       n_jobs=1,
                                       code_init=self.code_init,
                                       dict_init=self.dict_init,
                                       verbose=False,
                                       split_sign=False,
                                       random_state=self.random_state)
Exemple #35
0
def get_dim_reds_scikit(pct_features):
    n_components = max(int(pct_features * num_features), 1)
    return [
        LinearDiscriminantAnalysis(n_components=n_components),
        TruncatedSVD(n_components=n_components),
        #SparseCoder(n_components=n_components),
        DictionaryLearning(n_components=n_components),
        FactorAnalysis(n_components=n_components),
        SparsePCA(n_components=n_components),
        NMF(n_components=n_components),
        PCA(n_components=n_components),
        RandomizedPCA(n_components=n_components),
        KernelPCA(kernel="linear", n_components=n_components),
        KernelPCA(kernel="poly", n_components=n_components),
        KernelPCA(kernel="rbf", n_components=n_components),
        KernelPCA(kernel="sigmoid", n_components=n_components),
        KernelPCA(kernel="cosine", n_components=n_components),
        Isomap(n_components=n_components),
        LocallyLinearEmbedding(n_components=n_components,
                               eigen_solver='auto',
                               method='standard'),
        LocallyLinearEmbedding(n_neighbors=n_components,
                               n_components=n_components,
                               eigen_solver='auto',
                               method='modified'),
        LocallyLinearEmbedding(n_neighbors=n_components,
                               n_components=n_components,
                               eigen_solver='auto',
                               method='ltsa'),
        SpectralEmbedding(n_components=n_components)
    ]
def test_dictionary_learning_dtype_match(
    data_type,
    expected_type,
    fit_algorithm,
    transform_algorithm,
):
    # Verify preserving dtype for fit and transform in dictionary learning class
    dict_learner = DictionaryLearning(
        n_components=8,
        fit_algorithm=fit_algorithm,
        transform_algorithm=transform_algorithm,
        random_state=0,
    )
    dict_learner.fit(X.astype(data_type))
    assert dict_learner.components_.dtype == expected_type
    assert dict_learner.transform(X.astype(data_type)).dtype == expected_type
Exemple #37
0
def _dictionarylearning(Y, N, k, iter_=100000):
    """
    Perform dictionary learning on the given data set.
    --------------------------------------------------
    Input:
        Y: Measurement matrix of size M x L
        N: Number of sources
        k: Number of active sources
    Output:
        D: A dictionary matrix
    """
    dct = DictionaryLearning(n_components=N, transform_algorithm='lars',
                             transform_n_nonzero_coefs=k, max_iter=iter_)
    dct.fit(Y.T)
    D = dct.components_
    return D.T
Exemple #38
0
def test_dict_learning_positivity(transform_algorithm,
                                  positive_code,
                                  positive_dict):
    n_components = 5
    dico = DictionaryLearning(
        n_components, transform_algorithm=transform_algorithm, random_state=0,
        positive_code=positive_code, positive_dict=positive_dict).fit(X)
    code = dico.transform(X)
    if positive_dict:
        assert_true((dico.components_ >= 0).all())
    else:
        assert_true((dico.components_ < 0).any())
    if positive_code:
        assert_true((code >= 0).all())
    else:
        assert_true((code < 0).any())
Exemple #39
0
def generateOptSparseDictionary(images, patch_size, num_samples, num_features):
    video_patches, _ = generateVideoPatches(patch_size, images)
    samples = samplePatches(num_samples, video_patches)
    alg = DictionaryLearning(n_components=num_features)

    # Squeeze sample patches to be array
    alg.fit(samples.reshape(np.shape(samples)[0], np.shape(samples)[1]**2))
    features = alg.components_

    filter_size = np.shape(samples)[1]

    features = (features.T / np.linalg.norm(features, axis=1).T).T
    # features = (features.T - np.mean(features,axis=1).T).T

    features = features.reshape(features.shape[0], filter_size, filter_size)

    return features
Exemple #40
0
def get_dic_per_cluster(clust_q, data_cluster, dataq, i, out_q=None, kerPCA=False):
    if out_q is not None:
        name = mpc.current_process().name
        print name, 'Starting'
    else:
        print 'Starting estimation of dic %i...' % i
    # parse the feature vectors for each cluster
    for q in clust_q:
        data_cluster = np.vstack((data_cluster, dataq[q]))
    # remove useless first line
    data_cluster = data_cluster[1:, :]
    # learn the sparse code for that cluster
    if kerPCA is False:
        dict_learn = DictionaryLearning(n_jobs=10)
        dict_learn.fit(data_cluster)
    else:
        print 'Doing kernel PCA...'
        print data_cluster.shape
        dict_learn = KernelPCA(kernel="rbf", gamma=10, n_components=3)
        #dict_learn = PCA(n_components=10)
        dict_learn.fit(data_cluster)
    if out_q is not None:
        res = {}
        res[i] = dict_learn
        out_q.put(res)
        print name, 'Exiting'
    else:
        print 'Finished.'
        return dict_learn   # dict(i = dict_learn)
def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'):

    import os.path as osp
    fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd))
    if osp.exists(fname):
        D = np.load(fname)
    else:
        from sklearn.decomposition import DictionaryLearning
        mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
        im = mnist.train.next_batch(N)[0]
        im = im.reshape(N, 28, 28)
        im = [imresize(a, (17, 17), interp='bilinear', mode='L')-.5
              for a in im]
        X = np.array(im).reshape(N, -1)
        print(X.shape)

        dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd',
                                n_jobs=-1, verbose=1)
        dl.fit(X)
        D = dl.components_.reshape(K, -1)
        np.save(fname, D)
    return D
Exemple #42
0
    def __init__(self, num_components=10,
                 catalog_name='unknown',
                 alpha = 0.001,
                 transform_alpha = 0.01,
                 max_iter = 2000,
                 tol = 1e-9,
                 n_jobs = 1,
                 verbose = True,
                 random_state = None):

        self._decomposition   = 'Sparse Coding'
        self._num_components  = num_components
        self._catalog_name    = catalog_name
        self._alpha           = alpha
        self._transform_alpha = 0.001
        self._n_jobs          = n_jobs
        self._random_state    = random_state

        self._DL = DictionaryLearning(n_components=self._num_components,
                              alpha           = self._alpha,
                              transform_alpha = self._transform_alpha,
                              n_jobs          = self._n_jobs,
                              verbose         = verbose,
                              random_state    = self._random_state)
Exemple #43
0
class SparseCoding:

    DEFAULT_MODEL_PARAMS = {
        'n_components' : 10,
        'n_features' : 64,
        'max_iter' : 5,
        'random_state' : 1,
        'dict_init' : None,
        'code_init' : None
    }

    def __init__(self, model_filename=None):
        if model_filename is not None:
            self.load_model(model_filename)
        else:
            # default model params
            self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components']
            self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features']
            self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter']
            self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state']
            self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init']
            self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init']

            # initialize Dictionary Learning object with default params and weights
            self.DL_obj = DictionaryLearning(n_components=self.n_components,
                                       alpha=1,
                                       max_iter=self.max_iter,
                                       tol=1e-08,
                                       fit_algorithm='lars',
                                       transform_algorithm='omp',
                                       transform_n_nonzero_coefs=None,
                                       transform_alpha=None,
                                       n_jobs=1,
                                       code_init=self.code_init,
                                       dict_init=self.dict_init,
                                       verbose=False,
                                       split_sign=False,
                                       random_state=self.random_state)


    def save_model(self, filename):
        # save DL object to file, compress is also to prevent multiple model files.
        joblib.dump(self.DL_obj, filename, compress=3)


    def load_model(self, filename):
        # load DL Object from file
        self.DL_obj = joblib.load(filename)

        # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values.
        DL_params = self.DL_obj.get_params()
        for param in SparseCoding.DEFAULT_MODEL_PARAMS:
            if param in DL_params:
                setattr(self, param, DL_params[param])
            else:
                setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param])


    def learn_dictionary(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim

        # learn dictionary
        self.DL_obj.fit(whitened_patches)


    def get_dictionary(self):
        try:
            return self.DL_obj.components_
        except AttributeError:
            raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \
                                 + "Train the feature extraction model at least once to prevent this error.")


    def get_sparse_features(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim
        try:
            sparse_code = self.DL_obj.transform(whitened_patches)
        except NotFittedError:
            raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \
                                 + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \
                                 + "at least once to prevent this error.")
        return sparse_code


    def get_sign_split_features(self, sparse_features):
        n_samples, n_components = sparse_features.shape
        sign_split_features = np.empty((n_samples, 2 * n_components))
        sign_split_features[:, :n_components] = np.maximum(sparse_features, 0)
        sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0)
        return sign_split_features


    def get_pooled_features(self, input_feature_map, filter_size=(19,19)):
        # assuming square filters and images
        filter_side = filter_size[0]

        # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20)
        input_feature_map_shape = input_feature_map.shape
        if input_feature_map.ndim == 2:
            input_feature_map_side = int(np.sqrt(input_feature_map.shape[0]))
            input_feature_map = input_feature_map.reshape((input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1]))
        assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" %input_feature_map.ndim

        # get windows (57,57,20) to (3,3,1,19,19,20)
        input_feature_map_windows = view_as_windows(input_feature_map,
                                                    window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]),
                                                    step=filter_size[0])

        # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20)
        input_feature_map_windows = input_feature_map_windows.reshape((input_feature_map_windows.shape[0]**2,
                                                                       filter_size[0]**2,
                                                                       input_feature_map.shape[-1]))

        # calculate norms (9, 361, 20) to (9,361)
        input_feature_map_window_norms = np.linalg.norm(input_feature_map_windows, ord=2, axis=-1)

        # calculate indexes of max norms per window (9,361) to (9,1). One max index per window.
        max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1)

        # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window.
        pooled_features = input_feature_map_windows[np.arange(input_feature_map_windows.shape[0]), max_norm_indexes]

        # return pooled feature map
        return pooled_features


    # Combined Pipeline
    def get_pooled_features_from_whitened_patches(self, whitened_patches):
        sparse_features = self.get_sparse_features(whitened_patches)
        sign_split_features = self.get_sign_split_features(sparse_features)
        pooled_features = self.get_pooled_features(sign_split_features)
        return pooled_features
# sklearn utilities
from sklearn.decomposition import DictionaryLearning
from sklearn.preprocessing import normalize

def interface():
    args = argparse.ArgumentParser()
    # Required 
    args.add_argument('-i', '--data-matrix', help='Input data matrix', required=True)
    # Optional 
    args.add_argument('-d', '--dict-file', help='Dictionary encoder file (.pkl)', default='dict.pkl')
    args.add_argument('-n', '--num-atoms', help='Desired dictionary size', default=1000, type=int)
    args.add_argument('-a', '--alpha', help='Alpha (sparsity enforcement)', default=1.0, type=float)
    args = args.parse_args()
    return args

if __name__=="__main__":
    args = interface()

    # Load and preprocess the data
    sample_ids, matrix = parse_otu_matrix(args.data_matrix)
    matrix = normalize(matrix)

    # Learn a dictionary 
    dict_transformer = DictionaryLearning(n_components=args.num_atoms, alpha=args.alpha)
    dict_transformer.fit(matrix)

    # Save dictionary to file  
    save_object_to_file(dict_transformer, args.dict_file)

Exemple #45
0
	from sklearn.decomposition import NMF

	nmfHOG = NMF(n_components=components)
	nmfHOF = NMF(n_components=components)

	nmfHOG.fit(np.array([x['hog'] for x in features]).T)
	nmfHOF.fit(np.array([x['hof'] for x in features]).T)

	hogComponents = icaHOG.components_.T
	hofComponents = icaHOF.components_.T

	return hogComponents, hofComponents	

if 0:
	from sklearn.decomposition import DictionaryLearning
	dicHOG = DictionaryLearning(25)
	dicHOG.fit(hogs)


def displayComponents(components):
	
	sides = ceil(np.sqrt(len(components)))
	for i in range(len(components)):
		subplot(sides, sides, i+1)
		imshow(hog2image(components[i], imageSize=[24,24],orientations=4))

	sides = ceil(np.sqrt(components.shape[1]))
	for i in range(components.shape[1]):
		subplot(sides, sides, i+1)
		imshow(hog2image(components[:,i], imageSize=[24,24],orientations=4))
Exemple #46
0
class SC(object):
    """
    Wrapper for sklearn package.  Performs sparse coding

    Sparse Coding, or Dictionary Learning has 5 methods:
       - fit(waveforms)
       update class instance with Sparse Coding fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto new basis space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z^\dagger

       - get_basis()
       returns the basis vectors Z^\dagger

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self, num_components=10,
                 catalog_name='unknown',
                 alpha = 0.001,
                 transform_alpha = 0.01,
                 max_iter = 2000,
                 tol = 1e-9,
                 n_jobs = 1,
                 verbose = True,
                 random_state = None):

        self._decomposition   = 'Sparse Coding'
        self._num_components  = num_components
        self._catalog_name    = catalog_name
        self._alpha           = alpha
        self._transform_alpha = 0.001
        self._n_jobs          = n_jobs
        self._random_state    = random_state

        self._DL = DictionaryLearning(n_components=self._num_components,
                              alpha           = self._alpha,
                              transform_alpha = self._transform_alpha,
                              n_jobs          = self._n_jobs,
                              verbose         = verbose,
                              random_state    = self._random_state)

    def fit(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._DL.fit(self._waveforms)

    def fit_transform(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._DL.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self,A):
        # convert basis back to waveforms using fit
        new_waveforms = self._DL.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._DL.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the SPCA basis vectors (Z^\dagger)"""
        return self._DL.components_
audio = data["mfccs"]

image = np.zeros((1, 75 * 50))
for i in xrange(video.shape[2]):
    if i + 1 < video.shape[2]:
        image = np.vstack(
            (image, np.abs((video[:, :, i].reshape((1, 75 * 50)) - video[:, :, i + 1].reshape((1, 75 * 50)))))
        )
idx = np.random.shuffle([i for i in xrange(image[1:].shape[0])])
image = image[idx][0]
image = (image - np.min(image, axis=0)) / (np.max(image, axis=0) + 0.01)
audio = audio.T[idx, :][0]
print image.shape, audio.shape
fusion = np.hstack((image, audio))
# sparse code
video_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1)
audio_learner = DictionaryLearning(n_components=10, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1)
fusion_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1)

video_learner.fit(image)
"""
# build model
face_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10)
audio_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10)

# fit model

face_rbm.fit(image)
audio_rbm.fit(audio)
print face_rbm.components_.shape, audio_rbm.components_.shape
Exemple #48
0
new_U.dot(new_S)
#array([-2.20719466, -3.16170819, -4.11622173])


tsvd = TruncatedSVD(2)
tsvd.fit(iris_data)
tsvd.transform(iris_data)

#One advantage of TruncatedSVD over PCA is that TruncatedSVD can operate on sparse
#matrices while PCA cannot


#Decomposition分解 to classify分类 with DictionaryLearning

from sklearn.decomposition import DictionaryLearning
dl = DictionaryLearning(3)
transformed = dl.fit_transform(iris_data[::2])
transformed[:5]
#array([[ 0. , 6.34476574, 0. ],
#[ 0. , 5.83576461, 0. ],
#[ 0. , 6.32038375, 0. ],
#[ 0. , 5.89318572, 0. ],
#[ 0. , 5.45222715, 0. ]])

#Next, let's fit (not fit_transform) the testing set:
transformed = dl.transform(iris_data[1::2])


#Putting it all together with Pipelines

#Let's briefly load the iris dataset and seed it with some missing values:
pca.fit(mov)
#%%
import cv2
comps = np.reshape(pca.components_, [n_comps, 30, 30])
for count, comp in enumerate(comps):
    pl.subplot(4, 4, count + 1)
    blur = cv2.GaussianBlur(comp.astype(np.float32), (5, 5), 0)
    blur = np.array(blur / np.max(blur) * 255, dtype=np.uint8)
    ret3, th3 = cv2.threshold(
        blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    pl.imshow((th3 * comp).T)

#%%
n_comps = 3
dl = DictionaryLearning(n_comps, alpha=1, verbose=True)
comps = dl.fit_transform(Yr.T)
comps = np.reshape(comps, [30, 30, n_comps]).transpose([2, 0, 1])
for count, comp in enumerate(comps):
    pl.subplot(4, 4, count + 1)
    pl.imshow(comp)
#%%
N_ICA_COMPS = 8
ica = FastICA(N_ICA_COMPS, max_iter=10000, tol=10e-8)
ica.fit(pca.components_)
#%
comps = np.reshape(ica.components_, [N_ICA_COMPS, 30, 30])
for count, comp in enumerate(comps):
    idx = np.argmax(np.abs(comp))
    comp = comp * np.sign(comp.flatten()[idx])
    pl.subplot(4, 4, count + 1)