예제 #1
0
파일: DC.py 프로젝트: clouizos/AIR
def get_dic_per_cluster(clust_q, data_cluster, dataq, i, out_q=None, kerPCA=False):
    if out_q is not None:
        name = mpc.current_process().name
        print name, 'Starting'
    else:
        print 'Starting estimation of dic %i...' % i
    # parse the feature vectors for each cluster
    for q in clust_q:
        data_cluster = np.vstack((data_cluster, dataq[q]))
    # remove useless first line
    data_cluster = data_cluster[1:, :]
    # learn the sparse code for that cluster
    if kerPCA is False:
        dict_learn = DictionaryLearning(n_jobs=10)
        dict_learn.fit(data_cluster)
    else:
        print 'Doing kernel PCA...'
        print data_cluster.shape
        dict_learn = KernelPCA(kernel="rbf", gamma=10, n_components=3)
        #dict_learn = PCA(n_components=10)
        dict_learn.fit(data_cluster)
    if out_q is not None:
        res = {}
        res[i] = dict_learn
        out_q.put(res)
        print name, 'Exiting'
    else:
        print 'Finished.'
        return dict_learn   # dict(i = dict_learn)
예제 #2
0
def sparse_coding(dimension, input_x, alpha, iteration, tolerance):
	#dl = DictionaryLearning(dimension)
	dl = DictionaryLearning(dimension, alpha, iteration, tolerance) 
	dl.fit(input_x)
	#np.set_printoptions(precision=3, suppress=True)
	#print code
	#print dl.components_
	print "error:", dl.error_[-1]
	
	return dl
예제 #3
0
def test_dict_learning_lassocd_readonly_data():
    n_components = 12
    with TempMemmap(X) as X_read_only:
        dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd',
                                  transform_alpha=0.001, random_state=0, n_jobs=-1)
        code = dico.fit(X_read_only).transform(X_read_only)
        assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
def test_dict_learning_split():
    n_atoms = 5
    dico = DictionaryLearning(n_atoms, transform_algorithm='threshold')
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
예제 #5
0
def trainLowDict(buffer):
    print('Learning the dictionary...')
    t0 = time()
    dico = DictionaryLearning(n_components=100, alpha=1, max_iter=100,verbose=1)

    V = dico.fit(buffer).components_
    E = dico.error_
    dt = time() - t0
    print('done in %.2fs.' % dt)
    return V,E
def test_dict_learning_split():
    n_components = 5
    dico = DictionaryLearning(n_components, transform_algorithm='threshold',
                              random_state=0)
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_components] -
                       split_code[:, n_components:], code)
def test_dict_learning_reconstruction():
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def test_dict_learning_nonzero_coefs():
    n_components = 4
    dico = DictionaryLearning(n_components, transform_algorithm='lars',
                              transform_n_nonzero_coefs=3, random_state=0)
    code = dico.fit(X).transform(X[np.newaxis, 1])
    assert_true(len(np.flatnonzero(code)) == 3)

    dico.set_params(transform_algorithm='omp')
    code = dico.transform(X[np.newaxis, 1])
    assert_equal(len(np.flatnonzero(code)), 3)
def test_dict_learning_reconstruction_parallel():
    # regression test that parallel reconstruction works with n_jobs=-1
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0, n_jobs=-1)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'):

    import os.path as osp
    fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd))
    if osp.exists(fname):
        D = np.load(fname)
    else:
        from sklearn.decomposition import DictionaryLearning
        mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
        im = mnist.train.next_batch(N)[0]
        im = im.reshape(N, 28, 28)
        im = [imresize(a, (17, 17), interp='bilinear', mode='L')-.5
              for a in im]
        X = np.array(im).reshape(N, -1)
        print(X.shape)

        dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd',
                                n_jobs=-1, verbose=1)
        dl.fit(X)
        D = dl.components_.reshape(K, -1)
        np.save(fname, D)
    return D
예제 #11
0
def test_dict_learning_nonzero_coefs():
    n_components = 4
    dico = DictionaryLearning(
        n_components,
        transform_algorithm="lars",
        transform_n_nonzero_coefs=3,
        random_state=0,
    )
    code = dico.fit(X).transform(X[np.newaxis, 1])
    assert len(np.flatnonzero(code)) == 3

    dico.set_params(transform_algorithm="omp")
    code = dico.transform(X[np.newaxis, 1])
    assert len(np.flatnonzero(code)) == 3
예제 #12
0
def test_dict_learning_reconstruction_parallel():
    # regression test that parallel reconstruction works with n_jobs>1
    n_components = 12
    dico = DictionaryLearning(n_components,
                              transform_algorithm='omp',
                              transform_alpha=0.001,
                              random_state=0,
                              n_jobs=4)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'):

    import os.path as osp
    fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd))
    if osp.exists(fname):
        D = np.load(fname)
    else:
        from sklearn.decomposition import DictionaryLearning
        mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
        im = mnist.train.next_batch(N)[0]
        im = im.reshape(N, 28, 28)
        # im = [imresize(a, (17, 17), interp='bilinear', mode='L')-.5
              # for a in im]
        X = np.array(im).reshape(N, -1)
        # model = ResNet50(weights='imagenet',include_top=False)
        # X = feat_extract(model,im)
        print(X.shape)

        dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd',
                                n_jobs=-1, verbose=1)
        dl.fit(X)
        D = dl.components_.reshape(K, -1)
        np.save(fname, D)
    return D
예제 #14
0
def test_dict_learning_lassocd_readonly_data():
    n_components = 12
    with TempMemmap(X) as X_read_only:
        dico = DictionaryLearning(
            n_components,
            transform_algorithm="lasso_cd",
            transform_alpha=0.001,
            random_state=0,
            n_jobs=4,
        )
        with ignore_warnings(category=ConvergenceWarning):
            code = dico.fit(X_read_only).transform(X_read_only)
        assert_array_almost_equal(np.dot(code, dico.components_),
                                  X_read_only,
                                  decimal=2)
예제 #15
0
 def peakmem_fit(self, params):
     estimator = DictionaryLearning(**self.dl_params)
     estimator.fit(self.data)
예제 #16
0
    func_filename = source + str('bold.nii.gz')
    fmri_masked = nifti_masker.fit_transform(func_filename)
    fmri_masked = fmri_masked[condition_mask]

    fmri_masked = fmri_masked[:, np.all(fmri_masked != 0, axis=0)]

    # DEFINING features and targets
    features = fmri_masked
    targets = target_int

    # Dictionary Learning on Target
    dict_sparse = DictionaryLearning(alpha=1,
                                     n_components=sparse_components,
                                     max_iter=3,
                                     verbose=3)
    dict_sparse.fit(features)
    Dt_0 = dict_sparse.components_
    coder = SparseCoder(dictionary=Dt_0)
    Rt_0 = coder.transform(features)

    target_folder = 'C:\\Users\\Pouya\\Documents\\MATLAB\\transfer\\' + str(
        subjects[subs]) + '_brain_sparse.mat'
    sio.savemat(target_folder, {'Rt_0': Rt_0, 'targets': targets})

##
target_folder = 'C:\\Users\\Pouya\\Documents\\MATLAB\\transfer\\imagenet_fc7_pca200.mat'
sio.savemat(
    target_folder, {
        'imagenet_feat': imagenet_features,
        'pca_feat': pca_feat,
        'imagenet_targets': imagenet_targets
예제 #17
0
class SparseCoding:

    DEFAULT_MODEL_PARAMS = {
        'n_components': 10,
        'n_features': 64,
        'max_iter': 5,
        'random_state': 1,
        'dict_init': None,
        'code_init': None
    }

    def __init__(self, model_filename=None):
        if model_filename is not None:
            self.load_model(model_filename)
        else:
            # default model params
            self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS[
                'n_components']
            self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features']
            self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter']
            self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS[
                'random_state']
            self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init']
            self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init']

            # initialize Dictionary Learning object with default params and weights
            self.DL_obj = DictionaryLearning(n_components=self.n_components,
                                             alpha=1,
                                             max_iter=self.max_iter,
                                             tol=1e-08,
                                             fit_algorithm='lars',
                                             transform_algorithm='omp',
                                             transform_n_nonzero_coefs=None,
                                             transform_alpha=None,
                                             n_jobs=1,
                                             code_init=self.code_init,
                                             dict_init=self.dict_init,
                                             verbose=False,
                                             split_sign=False,
                                             random_state=self.random_state)

    def save_model(self, filename):
        # save DL object to file, compress is also to prevent multiple model files.
        joblib.dump(self.DL_obj, filename, compress=3)

    def load_model(self, filename):
        # load DL Object from file
        self.DL_obj = joblib.load(filename)

        # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values.
        DL_params = self.DL_obj.get_params()
        for param in SparseCoding.DEFAULT_MODEL_PARAMS:
            if param in DL_params:
                setattr(self, param, DL_params[param])
            else:
                setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param])

    def learn_dictionary(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape(
                (whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim

        # learn dictionary
        self.DL_obj.fit(whitened_patches)

    def get_dictionary(self):
        try:
            return self.DL_obj.components_
        except AttributeError:
            raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \
                                 + "Train the feature extraction model at least once to prevent this error.")

    def get_sparse_features(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape(
                (whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim
        try:
            sparse_code = self.DL_obj.transform(whitened_patches)
        except NotFittedError:
            raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \
                                 + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \
                                 + "at least once to prevent this error.")
        return sparse_code

    def get_sign_split_features(self, sparse_features):
        n_samples, n_components = sparse_features.shape
        sign_split_features = np.empty((n_samples, 2 * n_components))
        sign_split_features[:, :n_components] = np.maximum(sparse_features, 0)
        sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0)
        return sign_split_features

    def get_pooled_features(self, input_feature_map, filter_size=(19, 19)):
        # assuming square filters and images
        filter_side = filter_size[0]

        # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20)
        input_feature_map_shape = input_feature_map.shape
        if input_feature_map.ndim == 2:
            input_feature_map_side = int(np.sqrt(input_feature_map.shape[0]))
            input_feature_map = input_feature_map.reshape(
                (input_feature_map_side, input_feature_map_side,
                 input_feature_map_shape[-1]))
        assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" % input_feature_map.ndim

        # get windows (57,57,20) to (3,3,1,19,19,20)
        input_feature_map_windows = view_as_windows(
            input_feature_map,
            window_shape=(filter_size[0], filter_size[1],
                          input_feature_map.shape[-1]),
            step=filter_size[0])

        # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20)
        input_feature_map_windows = input_feature_map_windows.reshape(
            (input_feature_map_windows.shape[0]**2, filter_size[0]**2,
             input_feature_map.shape[-1]))

        # calculate norms (9, 361, 20) to (9,361)
        input_feature_map_window_norms = np.linalg.norm(
            input_feature_map_windows, ord=2, axis=-1)

        # calculate indexes of max norms per window (9,361) to (9,1). One max index per window.
        max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1)

        # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window.
        pooled_features = input_feature_map_windows[
            np.arange(input_feature_map_windows.shape[0]), max_norm_indexes]

        # return pooled feature map
        return pooled_features

    # Combined Pipeline
    def get_pooled_features_from_whitened_patches(self, whitened_patches):
        sparse_features = self.get_sparse_features(whitened_patches)
        sign_split_features = self.get_sign_split_features(sparse_features)
        pooled_features = self.get_pooled_features(sign_split_features)
        return pooled_features
# sklearn utilities
from sklearn.decomposition import DictionaryLearning
from sklearn.preprocessing import normalize

def interface():
    args = argparse.ArgumentParser()
    # Required 
    args.add_argument('-i', '--data-matrix', help='Input data matrix', required=True)
    # Optional 
    args.add_argument('-d', '--dict-file', help='Dictionary encoder file (.pkl)', default='dict.pkl')
    args.add_argument('-n', '--num-atoms', help='Desired dictionary size', default=1000, type=int)
    args.add_argument('-a', '--alpha', help='Alpha (sparsity enforcement)', default=1.0, type=float)
    args = args.parse_args()
    return args

if __name__=="__main__":
    args = interface()

    # Load and preprocess the data
    sample_ids, matrix = parse_otu_matrix(args.data_matrix)
    matrix = normalize(matrix)

    # Learn a dictionary 
    dict_transformer = DictionaryLearning(n_components=args.num_atoms, alpha=args.alpha)
    dict_transformer.fit(matrix)

    # Save dictionary to file  
    save_object_to_file(dict_transformer, args.dict_file)

예제 #19
0
from sklearn.decomposition import SparsePCA
from sklearn.decomposition import MiniBatchSparsePCA
from sklearn.decomposition import MiniBatchDictionaryLearning

##
source_folder = 'C:\\Users\\Pouya\\Documents\\MATLAB\\DECAF\\Analysis\\Movie_Genre_adaptation\\feats.mat'
dict = sio.loadmat(source_folder)
features = dict['features']
MovieFeatures = dict['MovieFeatures']

# Source Domain
dict_sparse = DictionaryLearning(alpha=1,
                                 n_components=4,
                                 max_iter=1000,
                                 verbose=3)
dict_sparse.fit(MovieFeatures)
Ds_0 = dict_sparse.components_
coder = SparseCoder(dictionary=Ds_0)
Rs_0 = coder.transform(MovieFeatures)

# Target Domain
dict_feat = [None] * 30
for subs in range(30):
    print(subs)

    feat = features[0, subs]

    #dict_sparse = DictionaryLearning(alpha=0.1, n_components=105, max_iter=10, transform_n_nonzero_coefs=105, verbose=3)
    #dict_sparse = SparsePCA(n_components=105, max_iter=3)
    #dict_sparse = MiniBatchDictionaryLearning(alpha=1, n_components=105, batch_size=10, n_iter=100)
    #dict_sparse.fit(feat)
예제 #20
0
    if i + 1 < video.shape[2]:
        image = np.vstack(
            (image, np.abs((video[:, :, i].reshape((1, 75 * 50)) - video[:, :, i + 1].reshape((1, 75 * 50)))))
        )
idx = np.random.shuffle([i for i in xrange(image[1:].shape[0])])
image = image[idx][0]
image = (image - np.min(image, axis=0)) / (np.max(image, axis=0) + 0.01)
audio = audio.T[idx, :][0]
print image.shape, audio.shape
fusion = np.hstack((image, audio))
# sparse code
video_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1)
audio_learner = DictionaryLearning(n_components=10, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1)
fusion_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1)

video_learner.fit(image)
"""
# build model
face_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10)
audio_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10)

# fit model

face_rbm.fit(image)
audio_rbm.fit(audio)
print face_rbm.components_.shape, audio_rbm.components_.shape

hidden = np.hstack((face_rbm.components_, audio_rbm.components_))
print hidden.shape

fusion_rbm = RBM(n_components=100,verbose=2, batch_size=20, n_iter=10)
예제 #21
0
    nmfHOG = NMF(n_components=components)
    nmfHOF = NMF(n_components=components)

    nmfHOG.fit(np.array([x['hog'] for x in features]).T)
    nmfHOF.fit(np.array([x['hof'] for x in features]).T)

    hogComponents = icaHOG.components_.T
    hofComponents = icaHOF.components_.T

    return hogComponents, hofComponents


if 0:
    from sklearn.decomposition import DictionaryLearning
    dicHOG = DictionaryLearning(25)
    dicHOG.fit(hogs)


def displayComponents(components):

    sides = ceil(np.sqrt(len(components)))
    for i in range(len(components)):
        subplot(sides, sides, i + 1)
        imshow(hog2image(components[i], imageSize=[24, 24], orientations=4))

    sides = ceil(np.sqrt(components.shape[1]))
    for i in range(components.shape[1]):
        subplot(sides, sides, i + 1)
        imshow(hog2image(components[:, i], imageSize=[24, 24], orientations=4))
예제 #22
0
x_train = x_train.reshape(-1, img_rows*img_cols*channels) # each image as vector

np.random.shuffle(x_train)
print(x_train.shape)

#dictionary file name
if use_fashion:
  file_name = 'dictionary_fashion_mnist_undercomplete'
else:
  file_name = 'dictionary_mnist_overcomplete'
#check if dictionary exists
if not path.exists(file_name):
  d=DictionaryLearning(n_components=2*784, max_iter=20)
  # train dictionary
  d.fit(x_train[1:10000, :])
  dictionary = d.components_
  print(dictionary.shape)

  with open(file_name, 'wb') as output:
    pickle.dump(d, output, pickle.HIGHEST_PROTOCOL)
  print("created new dictionary")

else:
  with open(file_name, 'rb') as input:
    d = pickle.load(input)
  print("loaded dictionary")
  sparse_dict = np.transpose(d.components_)
  print("analyse pursuit")

  num_images_to_pursuit = 10
예제 #23
0
                # Use pretrained model
                dico = pickle.load(
                    open(
                        f'{cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel',
                        'rb'))
                print(
                    f'Use hitted {cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel'
                )
                hit = True
            else:
                # Train a new model
                dico = DictionaryLearning(n_components=n_components,
                                          n_jobs=-24,
                                          max_iter=n_iter,
                                          verbose=True)
                dico.fit(images)
                n_iter_actual = dico.n_iter_
                print(f'{n_iter_actual} iters')

            timer.stop(start=' ')

            # Save the model
            if cfg.save:
                np.save(f'{cfg.save_path}/all_{n_components}_{n_iter_actual}',
                        dico.components_)
                pickle.dump(
                    dico,
                    open(
                        f'{cfg.save_path}/all_{n_components}_{n_iter_actual}.sklearnmodel',
                        'wb'))
# For pickling
def save_object(obj, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pk.dump(obj, output, pk.HIGHEST_PROTOCOL)


# Load sparse data
sparse_fit1 = pk.load(open("sparse_fit1.pkl", 'rb'))
sparse_fit2 = pk.load(open("sparse_fit2.pkl", 'rb'))
sparse_fit3 = pk.load(open("sparse_fit3.pkl", 'rb'))
sparse_fit4 = pk.load(open("sparse_fit4.pkl", 'rb'))
sparse_fit1 = np.concatenate((sparse_fit1, sparse_fit2))
print(sparse_fit1.shape)
sparse_fit2 = np.concatenate((sparse_fit3, sparse_fit4))
print(sparse_fit2.shape)
sparse_fit = np.concatenate((sparse_fit1, sparse_fit2))
print(sparse_fit.shape)
X = sparse_fit[:59478, :]
print(X.shape)

# Uses the dictionary learning class to transform the data

atoms = DictionaryLearning(100, 1, 1000, 1e-8, 'lars', 'lasso_lars')

#fit and transform data
atoms.fit(X)

print(atoms.components_.shape)
# Pickle atoms
save_object(atoms, 'atoms.pkl')
예제 #25
0
class SC(object):
    """
    Wrapper for sklearn package.  Performs sparse coding

    Sparse Coding, or Dictionary Learning has 5 methods:
       - fit(waveforms)
       update class instance with Sparse Coding fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto new basis space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z^\dagger

       - get_basis()
       returns the basis vectors Z^\dagger

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self, num_components=10,
                 catalog_name='unknown',
                 alpha = 0.001,
                 transform_alpha = 0.01,
                 max_iter = 2000,
                 tol = 1e-9,
                 n_jobs = 1,
                 verbose = True,
                 random_state = None):

        self._decomposition   = 'Sparse Coding'
        self._num_components  = num_components
        self._catalog_name    = catalog_name
        self._alpha           = alpha
        self._transform_alpha = 0.001
        self._n_jobs          = n_jobs
        self._random_state    = random_state

        self._DL = DictionaryLearning(n_components=self._num_components,
                              alpha           = self._alpha,
                              transform_alpha = self._transform_alpha,
                              n_jobs          = self._n_jobs,
                              verbose         = verbose,
                              random_state    = self._random_state)

    def fit(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._DL.fit(self._waveforms)

    def fit_transform(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._DL.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self,A):
        # convert basis back to waveforms using fit
        new_waveforms = self._DL.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._DL.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the SPCA basis vectors (Z^\dagger)"""
        return self._DL.components_
예제 #26
0
class SparseCoding:

    DEFAULT_MODEL_PARAMS = {
        'n_components' : 10,
        'n_features' : 64,
        'max_iter' : 5,
        'random_state' : 1,
        'dict_init' : None,
        'code_init' : None
    }

    def __init__(self, model_filename=None):
        if model_filename is not None:
            self.load_model(model_filename)
        else:
            # default model params
            self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components']
            self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features']
            self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter']
            self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state']
            self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init']
            self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init']

            # initialize Dictionary Learning object with default params and weights
            self.DL_obj = DictionaryLearning(n_components=self.n_components,
                                       alpha=1,
                                       max_iter=self.max_iter,
                                       tol=1e-08,
                                       fit_algorithm='lars',
                                       transform_algorithm='omp',
                                       transform_n_nonzero_coefs=None,
                                       transform_alpha=None,
                                       n_jobs=1,
                                       code_init=self.code_init,
                                       dict_init=self.dict_init,
                                       verbose=False,
                                       split_sign=False,
                                       random_state=self.random_state)


    def save_model(self, filename):
        # save DL object to file, compress is also to prevent multiple model files.
        joblib.dump(self.DL_obj, filename, compress=3)


    def load_model(self, filename):
        # load DL Object from file
        self.DL_obj = joblib.load(filename)

        # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values.
        DL_params = self.DL_obj.get_params()
        for param in SparseCoding.DEFAULT_MODEL_PARAMS:
            if param in DL_params:
                setattr(self, param, DL_params[param])
            else:
                setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param])


    def learn_dictionary(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim

        # learn dictionary
        self.DL_obj.fit(whitened_patches)


    def get_dictionary(self):
        try:
            return self.DL_obj.components_
        except AttributeError:
            raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \
                                 + "Train the feature extraction model at least once to prevent this error.")


    def get_sparse_features(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim
        try:
            sparse_code = self.DL_obj.transform(whitened_patches)
        except NotFittedError:
            raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \
                                 + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \
                                 + "at least once to prevent this error.")
        return sparse_code


    def get_sign_split_features(self, sparse_features):
        n_samples, n_components = sparse_features.shape
        sign_split_features = np.empty((n_samples, 2 * n_components))
        sign_split_features[:, :n_components] = np.maximum(sparse_features, 0)
        sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0)
        return sign_split_features


    def get_pooled_features(self, input_feature_map, filter_size=(19,19)):
        # assuming square filters and images
        filter_side = filter_size[0]

        # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20)
        input_feature_map_shape = input_feature_map.shape
        if input_feature_map.ndim == 2:
            input_feature_map_side = int(np.sqrt(input_feature_map.shape[0]))
            input_feature_map = input_feature_map.reshape((input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1]))
        assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" %input_feature_map.ndim

        # get windows (57,57,20) to (3,3,1,19,19,20)
        input_feature_map_windows = view_as_windows(input_feature_map,
                                                    window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]),
                                                    step=filter_size[0])

        # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20)
        input_feature_map_windows = input_feature_map_windows.reshape((input_feature_map_windows.shape[0]**2,
                                                                       filter_size[0]**2,
                                                                       input_feature_map.shape[-1]))

        # calculate norms (9, 361, 20) to (9,361)
        input_feature_map_window_norms = np.linalg.norm(input_feature_map_windows, ord=2, axis=-1)

        # calculate indexes of max norms per window (9,361) to (9,1). One max index per window.
        max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1)

        # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window.
        pooled_features = input_feature_map_windows[np.arange(input_feature_map_windows.shape[0]), max_norm_indexes]

        # return pooled feature map
        return pooled_features


    # Combined Pipeline
    def get_pooled_features_from_whitened_patches(self, whitened_patches):
        sparse_features = self.get_sparse_features(whitened_patches)
        sign_split_features = self.get_sign_split_features(sparse_features)
        pooled_features = self.get_pooled_features(sign_split_features)
        return pooled_features
예제 #27
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

def test_Pipeline(data):
    X_train,X_test,y_train,y_test = data
    steps = [('Linear_SVM',LinearSVC(C=1,penalty='l1',dual=False))]
    pipeline = Pipeline(steps)
    pipeline.fit(X_train,y_train)
    print('name steps : \n',pipeline.named_steps)
    print('Pipeline score : \n',pipeline.score(X_test,y_test))

data = load_digits()
X = data.data
y = data.target
test_Pipeline(model_selection.train_test_split(X,y,test_size=0.25,stratify=y))

#字典学习
from sklearn.decomposition import DictionaryLearning
X= [[1,2,3,4,5],
    [6,7,8,9,10],
    [10,9,8,7,6],
    [5,4,3,2,1]]
dct = DictionaryLearning(n_components=3)
dct.fit(X)
dct.transform(X)





예제 #28
0
class SparseCoding(object):
    def __init__(self, n, transform_algorithm='lars'):
        self.n = n
        self.net = DictionaryLearning(n_components=n, alpha=0.8, max_iter=1000)
        self.net.set_params(transform_algorithm=transform_algorithm)

    def plot_B(self, B):
        plt.figure(figsize=(4.2, 4))
        for i, comp in enumerate(B[:self.n]):
            plt.subplot(10, 10, i + 1)
            plt.imshow(comp, cmap=plt.cm.gray_r, interpolation='nearest')
            plt.xticks(())
            plt.yticks(())

        plt.suptitle('Dictionary learned from time series\n' +
                     'Train time %.1fs on %d patches' % (dt, len(data)),
                     fontsize=16)

        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def _init(self):
        a = np.random.random((self.n, self.m))
        b = np.random.random((self.T, self.n))
        b /= sum(b)
        return a, b

    def init_weights(self, X_mat):
        B, A, recon = [], [], []
        for app in X_mat:
            data = X_mat[app].reshape(1, -1)
            B_i = self.net.fit(data).components_
            A_i = self.net.transform(data)
            X_hat = np.dot(A_i, B_i)

            B.append(B_i)
            A.append(A_i)
            recon.append(X_hat)

            print("MSE Error: ", np.mean((data - X_hat)**2))

        return A, B, recon

    def DiscriminativeDisaggregation(self, appliances, B, A):

        x = np.array([appliances[app] for app in appliances])
        x = x.T

        A_star = np.vstack(A)
        B_cat = np.hstack(B)
        change = 1
        t = 0

        print(A_star.shape)
        print(B_cat.shape)

        while t <= self.steps and self.epsilon <= change:
            B_cat_p = B_cat
            acts = self.F(x, B_cat, A=A_star)
            B_cat = (B_cat - self.alpha *
                     ((x - B_cat.dot(acts)).dot(acts.T) -
                      (x - B_cat.dot(A_star)).dot(A_star.T)))
            B_cat = self._pos_constraint(B_cat)
            B_cat /= sum(B_cat)

            t += 1
            change = np.linalg.norm(B_cat - B_cat_p)
            print("Change is {} and step is {} ".format(change, t))

        return B_cat

    def F(self, x, B, x_train=None, A=None, rp_tep=False, rp_gl=False):
        B = np.asarray(B)
        A = np.asarray(A)
        coder = SparseCoder(dictionary=B.T,
                            transform_alpha=self.rp,
                            transform_algorithm='lasso_cd')

        comps, acts = librosa.decompose.decompose(x, transformer=coder)
        acts = self._pos_constraint(acts)
        return acts

    def predict(self, A, B):
        print(A.shape)
        print(B.shape)

        return B.dot(A)
예제 #29
0
    return feat

def create_dictionary_dl(lmbd, d=2,m=100,n=20, N=10000,case=0,dir_mnist='/home/dujw/darse/save_exp/synthetic'):

    import os.path as osp
    fname = osp.join(dir_mnist, "D_synthetic_d{}_m{},n{},case{},lmbd{}.npy".format(d,m,n,case,lmbd))
    if osp.exists(fname):
        D = np.load(fname)
    else:
        from sklearn.decomposition import DictionaryLearning
        aa = SyntheticProblemGenerator(d=d,m=m,n=n)
        X = aa.get_batch(N)[0]
        K = m
        dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd',
                                n_jobs=-1, verbose=1)
        dl.fit(X)
        D = dl.components_.reshape(K, -1)
        np.save(fname, D)
    return D
class synthetic_generate(object):
    def __init__(self,d,N=100000,m=100,n=20,case=0,save_exp="/home/dujw/darse/save_exp/synthetic"):
        self.N = N
        self.case = case
        self.m = m
        self.n = n
        self.d = d
        self.save_exp = save_exp
        self.phi = self.getPhi()
        self.x, self.y = self.getXY()
    def getXY(self):
        N = self.N
class DictionaryLearningMethod(BaseMethod):
    """Implement the dict learning method of the paper using sklearn."""

    def __init__(self, width=24, stride=12, n_components=10, alpha=1,
                 verbose=1, random_state=0, n_jobs=4, max_iter=1):
        self.width = width
        self.stride = stride

        self.n_components = n_components
        self.alpha = alpha
        self.verbose = verbose
        self.random_state = random_state
        self.n_jobs = n_jobs
        self.max_iter = max_iter

        self.estimator = DictionaryLearning(
            n_components=n_components,
            alpha=alpha,
            verbose=verbose,
            random_state=random_state,
            n_jobs=n_jobs,
            max_iter=max_iter,
        )

    @staticmethod
    def window_split(X, s, w):
        """From a signal, create an array of overlapping windows."""
        X = np.array(X).reshape(-1, 1)

        if w > X.shape[0]:
            raise ValueError(f'Window width bigger than signal size ({w}>{X.shape[0]}).')

        n_h = X.shape[0]
        c = int((n_h - w)/s + 1)

        Xs = []
        for k in range(c):
            i = w + k*s
            x = X[i-w:i]
            Xs.append(x)

        return np.concatenate(Xs, axis=1)

    @staticmethod
    def window_merge(X_h, s):
        """From array of overlapping windows, reconstruct the original signal.

        Parameters:
        -----------
            X_h : np.array of shape (w, c)
                Array of overlapping windows.
            s : int
                Stride

        Returns:
        --------
            X : np.array of shape

        """
        w, c = X_h.shape
        W = np.zeros((c, w+s*(c-1)))

        for i in range(c):
            W[i, i*s:i*s+w] = X_h[:, i]

        N = np.sum(W != 0, axis=0)
        x_hat = np.divide(np.sum(W, axis=0), N)
        return x_hat

    def fit(self, X, y=None):
        X_h = self.window_split(X, self.stride, self.width)
        self.estimator.fit(X_h.T)

    def transform_codes(self, X):
        X_h = self.window_split(X, self.stride, self.width)
        X_pred_codes = self.estimator.transform(X_h.T).T
        return X_pred_codes

    def codes_to_signal(self, X_codes):
        D = self.estimator.components_.T
        X_h = D@X_codes
        X = self.window_merge(X_h, self.stride)
        return X

    def transform(self, X):
        X_pred_codes = self.transform_codes(X)
        X_pred = self.codes_to_signal(X_pred_codes)
        return X_pred

    def get_atoms(self):
        return self.estimator.components_.T
예제 #31
0
                        n_components=n_components)

D = D_fixed
n_nonzero = 3
alpha = None
algo = 'omp'
color_1 = 'red'
title = algo.upper()

di = DictionaryLearning(n_components=n_components,
                        fit_algorithm='cd',
                        transform_algorithm='lasso_cd',
                        positive_code=True,
                        positive_dict=True)

di.fit(comp_matrix)

d = di.transform(comp_matrix)

coder_1 = SparseCoder(dictionary=d.T,
                      transform_n_nonzero_coefs=n_nonzero,
                      transform_alpha=alpha,
                      transform_algorithm=algo)

comps, acts = librosa.decompose.decompose(comp_matrix, transformer=coder_1)

plt.plot(comp_matrix[0, :],
         color='black',
         lw=2,
         linestyle='--',
         label='Original signal',
예제 #32
0
class SC(object):
    """
    Wrapper for sklearn package.  Performs sparse coding

    Sparse Coding, or Dictionary Learning has 5 methods:
       - fit(waveforms)
       update class instance with Sparse Coding fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto new basis space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z^\dagger

       - get_basis()
       returns the basis vectors Z^\dagger

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self,
                 num_components=10,
                 catalog_name='unknown',
                 alpha=0.001,
                 transform_alpha=0.01,
                 max_iter=2000,
                 tol=1e-9,
                 n_jobs=1,
                 verbose=True,
                 random_state=None):

        self._decomposition = 'Sparse Coding'
        self._num_components = num_components
        self._catalog_name = catalog_name
        self._alpha = alpha
        self._transform_alpha = 0.001
        self._n_jobs = n_jobs
        self._random_state = random_state

        self._DL = DictionaryLearning(n_components=self._num_components,
                                      alpha=self._alpha,
                                      transform_alpha=self._transform_alpha,
                                      n_jobs=self._n_jobs,
                                      verbose=verbose,
                                      random_state=self._random_state)

    def fit(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._DL.fit(self._waveforms)

    def fit_transform(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._DL.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self, A):
        # convert basis back to waveforms using fit
        new_waveforms = self._DL.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._DL.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the SPCA basis vectors (Z^\dagger)"""
        return self._DL.components_
예제 #33
0
Ymean = Ynoisy.mean(axis=0)
Ynoisy = Ynoisy - numpy.tile(Ymean, [Ynoisy.shape[0], 1])

# Select sample patches for training
ch = numpy.random.permutation(Ynoisy.shape[1])[:N]
Y = Ynoisy[:, ch].T
print(Y.shape)

# Training dictionary
from sklearn.decomposition import DictionaryLearning
dico = DictionaryLearning(n,
                          transform_algorithm='omp',
                          alpha=s,
                          random_state=0,
                          verbose=False)
dico.fit(Y)

# Testing the validity of the sparse representation
Xt = dico.transform(Y)
print(Xt.shape)
numpy.testing.assert_array_almost_equal(numpy.dot(Xt, dico.components_),
                                        Y,
                                        decimal=1)

# Generating sparse representation for entire image
Xc = dico.transform(Ynoisy.T)
print(Xc.T.shape)
# D * X
A = numpy.dot(Xc, dico.components_).T

# Inverse centering, image restoration and output
예제 #34
0
	nmfHOG = NMF(n_components=components)
	nmfHOF = NMF(n_components=components)

	nmfHOG.fit(np.array([x['hog'] for x in features]).T)
	nmfHOF.fit(np.array([x['hof'] for x in features]).T)

	hogComponents = icaHOG.components_.T
	hofComponents = icaHOF.components_.T

	return hogComponents, hofComponents	

if 0:
	from sklearn.decomposition import DictionaryLearning
	dicHOG = DictionaryLearning(25)
	dicHOG.fit(hogs)


def displayComponents(components):
	
	sides = ceil(np.sqrt(len(components)))
	for i in range(len(components)):
		subplot(sides, sides, i+1)
		imshow(hog2image(components[i], imageSize=[24,24],orientations=4))

	sides = ceil(np.sqrt(components.shape[1]))
	for i in range(components.shape[1]):
		subplot(sides, sides, i+1)
		imshow(hog2image(components[:,i], imageSize=[24,24],orientations=4))

예제 #35
0
def main():

    start = time.time()

    args = sys.argv
    target = args[1]
    sub = args[2]
    threshold = args[3]
    dimention = int(args[4])
    shift = int(args[5])
    sample = int(args[6])

    print('target : {}'.format(target))
    print('subject : {}'.format(sub))

    print('{} secずらし'.format(shift))

    #脳活動データ読み込み
    with open(
            '../data/Brain/' + target + '/' + sub + '_train_reduced_' +
            threshold + '.pickle', 'rb') as f:
        brain_data = pickle.load(f)

    #意味表象データ読み込み
    with open('../data/srm/' + target + '_srm300_train.pickle', 'rb') as f:
        semantic_data = pickle.load(f)

    #時間差を考慮した意味表象行列取得
    brain_data, semantic_data = get_time_shift_data(brain_data, semantic_data,
                                                    target, sub, shift)

    print('brain sample : {}'.format(len(brain_data)))
    print('semantic_data : {}'.format(len(semantic_data)))

    #2つを結合した合成行列を作成
    brainw2vdata = np.c_[brain_data, semantic_data]
    brainw2vdata = np.array(brainw2vdata)

    brainw2vdata = brainw2vdata[::sample]

    print("次元:")
    print(brainw2vdata.shape)

    #辞書学習
    dict_model = DictionaryLearning(n_components=dimention,
                                    alpha=1.0,
                                    transform_algorithm='lasso_lars',
                                    transform_alpha=1.0,
                                    fit_algorithm='lars',
                                    verbose=True)
    dict_model.fit(brainw2vdata)

    #辞書
    Dict = dict_model.components_
    print("辞書:")
    print(Dict.shape)

    #係数
    coef = dict_model.transform(brainw2vdata)
    print("係数:")
    print(coef.shape)

    #辞書保存
    f = open(
        "../data/Dict/" + target + "/Dict_" + sub + "_pred" + threshold +
        "_base" + str(dimention) + "_sec" + str(shift) + "_sample" +
        str(sample) + ".pickle", "wb")
    pickle.dump(Dict, f)
    f.close()

    #係数保存
    f = open(
        "../data/Dict/" + target + "/Coef_" + sub + "_pred" + threshold +
        "_base" + str(dimention) + "_sec" + str(shift) + "_sample" +
        str(sample) + ".pickle", "wb")
    pickle.dump(coef, f)
    f.close()

    #計算時間出力
    elapsed_time = time.time() - start
    print(("elapsed_time:{0}".format(elapsed_time)) + "[sec]")
예제 #36
0
from matplotlib import pyplot as plt
import pandas as pd


def decode_image(image):
    decoded_image = np.empty((256, 256))
    for i in range(1024):
        r = i % 32
        c = i // 32
        decoded_image[r * 8:r * 8 + 8,
                      c * 8:c * 8 + 8] = image[i].reshape([8, 8], order="F")
    return decoded_image


if __name__ == "__main__":

    MatPatchedImage = scipy.io.loadmat(
        r"C:\Users\ktmks\Documents\Dic_ler1\ver1_02\mono\PatchData.mat")
    PatchData = np.array(MatPatchedImage["PatchData"]).T

    decoded_image = decode_image(PatchData[:1024, :])

    dico = DictionaryLearning(
        n_components=128,
        transform_n_nonzero_coefs=8,
        verbose=True,
        max_iter=1000,
    )
    Dict = dico.fit(PatchData)
    print("Hello")
예제 #37
0
def test_dict_learning_unknown_fit_algorithm():
    n_components = 5
    dico = DictionaryLearning(n_components, fit_algorithm='<unknown>')
    with pytest.raises(ValueError):
        dico.fit(X)
예제 #38
0
import pandas as pd
import dill

N_COMPONENTS               =  500
TRANSFORM_N_NONZERO_COEFS  =   10
VERBOSE                    =  True
MAX_ITER                   =   10

MatBrainImage=scipy.io.loadmat(r"C:\Users\ktmks\Documents\research\tmp_results\for_python_data\brain_f_data.mat")

label=MatBrainImage["label"]
Y=MatBrainImage["data"]

dic=DictionaryLearning(n_components              =              N_COMPONENTS,
                       transform_n_nonzero_coefs = TRANSFORM_N_NONZERO_COEFS,
                       verbose                   =                   VERBOSE,
                       max_iter                  =                  MAX_ITER 
                       )
dic.fit(Y)
D=dic.components_
X=dic.transform(Y)
Y_=np.dot(X,D)

filepath = r"C:\Users\ktmks\Documents\research\Python\Brain_DL"+"\\"
filename = "res_"+"AtomN-"   + str(N_COMPONENTS)\
          +"_SparseDegree-"  + str(TRANSFORM_N_NONZERO_COEFS)\
          +"_MaxIter-"       + str(MAX_ITER)
save_filename=filepath+filename+".pkl"

dill.dump_session(save_filename)
scipy.io.savemat(filename+".mat",{"D":D,"X":X,"Y_":Y_,"label":label})