def test_dict_learning_lars_code_positivity(): n_components = 5 dico = DictionaryLearning( n_components, transform_algorithm="lars", random_state=0, positive_code=True, fit_algorithm="cd").fit(X) err_msg = "Positive constraint not supported for '{}' coding method." err_msg = err_msg.format("lars") with pytest.raises(ValueError, match=err_msg): dico.transform(X)
def test_dict_learning_split(): n_atoms = 5 dico = DictionaryLearning(n_atoms, transform_algorithm='threshold') code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
def test_dict_learning_split(): n_atoms = 5 dico = DictionaryLearning(n_atoms, transform_algorithm='threshold') code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
def test_dict_learning_shapes(): n_components = 5 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert_equal(dico.components_.shape, (n_components, n_features)) n_components = 1 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert_equal(dico.components_.shape, (n_components, n_features)) assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
def test_dict_learning_shapes(): n_components = 5 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert dico.components_.shape == (n_components, n_features) n_components = 1 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert dico.components_.shape == (n_components, n_features) assert dico.transform(X).shape == (X.shape[0], n_components)
def test_dict_learning_split(): n_components = 5 dico = DictionaryLearning(n_components, transform_algorithm='threshold', random_state=0) code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_almost_equal(split_code[:, :n_components] - split_code[:, n_components:], code)
def test_dict_learning_split(): n_components = 5 dico = DictionaryLearning(n_components, transform_algorithm='threshold', random_state=0) code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_components] - split_code[:, n_components:], code)
def test_dict_learning_reconstruction(): n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def test_dict_learning_reconstruction(): n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def test_dict_learning_nonzero_coefs(): n_components = 4 dico = DictionaryLearning(n_components, transform_algorithm='lars', transform_n_nonzero_coefs=3, random_state=0) code = dico.fit(X).transform(X[np.newaxis, 1]) assert_true(len(np.flatnonzero(code)) == 3) dico.set_params(transform_algorithm='omp') code = dico.transform(X[np.newaxis, 1]) assert_equal(len(np.flatnonzero(code)), 3)
def test_dict_learning_nonzero_coefs(): n_components = 4 dico = DictionaryLearning(n_components, transform_algorithm='lars', transform_n_nonzero_coefs=3, random_state=0) code = dico.fit(X).transform(X[np.newaxis, 1]) assert len(np.flatnonzero(code)) == 3 dico.set_params(transform_algorithm='omp') code = dico.transform(X[np.newaxis, 1]) assert len(np.flatnonzero(code)) == 3
def test_dict_learning_reconstruction_parallel(): # regression test that parallel reconstruction works with n_jobs=-1 n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0, n_jobs=-1) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def test_dict_learning_reconstruction_parallel(): # regression test that parallel reconstruction works with n_jobs>1 n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0, n_jobs=4) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def dictionary_learn(): x = [[1, -2, 3, 4, 5.], [3, 4, -5, 6, 7], [1, 7, 2, -6, 2], [3, 8, 6, 2, -8]] print(x) dct = DictionaryLearning(n_components=5) dct.fit(x) print(dct.components_) print(dct.transform(x)) pass
def test_dict_learning_nonzero_coefs(): n_atoms = 4 dico = DictionaryLearning(n_atoms, transform_algorithm='lars', transform_n_nonzero_coefs=3, random_state=0) code = dico.fit(X).transform(X[1]) assert_true(len(np.flatnonzero(code)) == 3) dico.set_params(transform_algorithm='omp') code = dico.transform(X[1]) assert_equal(len(np.flatnonzero(code)), 3)
def test_DictionaryLearning(n_components): ''' 测试 DictionaryLearning 的用法 :return: None ''' X = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [10, 9, 8, 7, 6], [5, 4, 3, 2, 1]] print("before transform:", X) dct = DictionaryLearning(n_components=n_components) dct.fit(X) print("components is :", dct.components_) print("after transform:", dct.transform(X))
def test_DictionaryLearning(): from sklearn.decomposition import DictionaryLearning x = [ [1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [10, 9, 8, 7, 6], [5, 4, 3, 2, 1] ] print("before transform:", x) dct = DictionaryLearning(n_components=3) dct.fit(x) print("components is :", dct.components_) print("after transform:", dct.transform(x))
class _DictionaryLearningImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_dictionary_learning_dtype_match( data_type, expected_type, fit_algorithm, transform_algorithm, ): # Verify preserving dtype for fit and transform in dictionary learning class dict_learner = DictionaryLearning( n_components=8, fit_algorithm=fit_algorithm, transform_algorithm=transform_algorithm, random_state=0, ) dict_learner.fit(X.astype(data_type)) assert dict_learner.components_.dtype == expected_type assert dict_learner.transform(X.astype(data_type)).dtype == expected_type
def test_dict_learning_positivity(transform_algorithm, positive_code, positive_dict): n_components = 5 dico = DictionaryLearning( n_components, transform_algorithm=transform_algorithm, random_state=0, positive_code=positive_code, positive_dict=positive_dict).fit(X) code = dico.transform(X) if positive_dict: assert_true((dico.components_ >= 0).all()) else: assert_true((dico.components_ < 0).any()) if positive_code: assert_true((code >= 0).all()) else: assert_true((code < 0).any())
def test_size(): np.random.seed(0) N = 100 L = 128 X = np.random.randn(N, 10) + np.random.rand(N, 10) dico1 = ApproximateKSVD(n_components=L) dico1.fit(X) gamma1 = dico1.transform(X) e1 = norm(X - gamma1.dot(dico1.components_)) dico2 = DictionaryLearning(n_components=L) dico2.fit(X) gamma2 = dico2.transform(X) e2 = norm(X - gamma2.dot(dico2.components_)) assert dico1.components_.shape == dico2.components_.shape assert gamma1.shape == gamma2.shape assert e1 < e2
class DICL: def __init__(self, rfe_cv, *args, **kwargs): self.rfe = None self.rfe_cv = rfe_cv self.model = DictionaryLearning(*args, **kwargs) def fit(self, X, y): Z = numpy.concatenate([X, y.reshape(-1, 1)], axis=1) Z = numpy.array(Z, dtype=numpy.float32) Z[Z == numpy.inf] = numpy.nan Z[Z == -numpy.inf] = numpy.nan X_, y_ = X[~pandas.isna(Z).any(axis=1), :], y[~pandas.isna(Z).any( axis=1)] if Z.shape[0] != X.shape[0]: print( 'FIT: the sample contains NaNs, they were dropped\tN of dropped NaNs: {0}' .format(X.shape[0] - X_.shape[0])) if self.rfe_cv: raise Exception("PCA could not be processed with RFE_CV") else: self.model.fit(X_) def predict(self, X): Z = numpy.concatenate([X], axis=1) Z = numpy.array(Z, dtype=numpy.float32) Z[Z == numpy.inf] = numpy.nan Z[Z == -numpy.inf] = numpy.nan nan_mask = ~pandas.isna(Z).any(axis=1) X_ = X[nan_mask, :] if Z.shape[0] != X.shape[0]: print( 'PREDICT: the sample contains NaNs, they were dropped\tN of dropped NaNs: {0}' .format(X.shape[0] - X_.shape[0])) if self.rfe_cv: raise Exception("PCA could not be processed with RFE_CV") else: predicted = self.model.transform(X_) Z = numpy.full(shape=(X.shape[0], predicted.shape[1]), fill_value=numpy.nan, dtype=numpy.float64) Z[nan_mask, :] = predicted return Z
def test_size(): np.random.seed(0) N = 50 L = 12 n_features = 16 D = np.random.randn(L, n_features) B = np.array(sp.sparse.random(N, L, density=0.5).todense()) X = np.dot(B, D) dico1 = ApproximateKSVD(n_components=L, transform_n_nonzero_coefs=L) dico1.fit(X) gamma1 = dico1.transform(X) e1 = norm(X - gamma1.dot(dico1.components_)) dico2 = DictionaryLearning(n_components=L, transform_n_nonzero_coefs=L) dico2.fit(X) gamma2 = dico2.transform(X) e2 = norm(X - gamma2.dot(dico2.components_)) assert dico1.components_.shape == dico2.components_.shape assert gamma1.shape == gamma2.shape assert e1 < e2
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline def test_Pipeline(data): X_train,X_test,y_train,y_test = data steps = [('Linear_SVM',LinearSVC(C=1,penalty='l1',dual=False))] pipeline = Pipeline(steps) pipeline.fit(X_train,y_train) print('name steps : \n',pipeline.named_steps) print('Pipeline score : \n',pipeline.score(X_test,y_test)) data = load_digits() X = data.data y = data.target test_Pipeline(model_selection.train_test_split(X,y,test_size=0.25,stratify=y)) #字典学习 from sklearn.decomposition import DictionaryLearning X= [[1,2,3,4,5], [6,7,8,9,10], [10,9,8,7,6], [5,4,3,2,1]] dct = DictionaryLearning(n_components=3) dct.fit(X) dct.transform(X)
class DictionaryLearningMethod(BaseMethod): """Implement the dict learning method of the paper using sklearn.""" def __init__(self, width=24, stride=12, n_components=10, alpha=1, verbose=1, random_state=0, n_jobs=4, max_iter=1): self.width = width self.stride = stride self.n_components = n_components self.alpha = alpha self.verbose = verbose self.random_state = random_state self.n_jobs = n_jobs self.max_iter = max_iter self.estimator = DictionaryLearning( n_components=n_components, alpha=alpha, verbose=verbose, random_state=random_state, n_jobs=n_jobs, max_iter=max_iter, ) @staticmethod def window_split(X, s, w): """From a signal, create an array of overlapping windows.""" X = np.array(X).reshape(-1, 1) if w > X.shape[0]: raise ValueError(f'Window width bigger than signal size ({w}>{X.shape[0]}).') n_h = X.shape[0] c = int((n_h - w)/s + 1) Xs = [] for k in range(c): i = w + k*s x = X[i-w:i] Xs.append(x) return np.concatenate(Xs, axis=1) @staticmethod def window_merge(X_h, s): """From array of overlapping windows, reconstruct the original signal. Parameters: ----------- X_h : np.array of shape (w, c) Array of overlapping windows. s : int Stride Returns: -------- X : np.array of shape """ w, c = X_h.shape W = np.zeros((c, w+s*(c-1))) for i in range(c): W[i, i*s:i*s+w] = X_h[:, i] N = np.sum(W != 0, axis=0) x_hat = np.divide(np.sum(W, axis=0), N) return x_hat def fit(self, X, y=None): X_h = self.window_split(X, self.stride, self.width) self.estimator.fit(X_h.T) def transform_codes(self, X): X_h = self.window_split(X, self.stride, self.width) X_pred_codes = self.estimator.transform(X_h.T).T return X_pred_codes def codes_to_signal(self, X_codes): D = self.estimator.components_.T X_h = D@X_codes X = self.window_merge(X_h, self.stride) return X def transform(self, X): X_pred_codes = self.transform_codes(X) X_pred = self.codes_to_signal(X_pred_codes) return X_pred def get_atoms(self): return self.estimator.components_.T
class SparseCoding: DEFAULT_MODEL_PARAMS = { 'n_components' : 10, 'n_features' : 64, 'max_iter' : 5, 'random_state' : 1, 'dict_init' : None, 'code_init' : None } def __init__(self, model_filename=None): if model_filename is not None: self.load_model(model_filename) else: # default model params self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components'] self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features'] self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter'] self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state'] self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init'] self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init'] # initialize Dictionary Learning object with default params and weights self.DL_obj = DictionaryLearning(n_components=self.n_components, alpha=1, max_iter=self.max_iter, tol=1e-08, fit_algorithm='lars', transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, n_jobs=1, code_init=self.code_init, dict_init=self.dict_init, verbose=False, split_sign=False, random_state=self.random_state) def save_model(self, filename): # save DL object to file, compress is also to prevent multiple model files. joblib.dump(self.DL_obj, filename, compress=3) def load_model(self, filename): # load DL Object from file self.DL_obj = joblib.load(filename) # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values. DL_params = self.DL_obj.get_params() for param in SparseCoding.DEFAULT_MODEL_PARAMS: if param in DL_params: setattr(self, param, DL_params[param]) else: setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param]) def learn_dictionary(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim # learn dictionary self.DL_obj.fit(whitened_patches) def get_dictionary(self): try: return self.DL_obj.components_ except AttributeError: raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \ + "Train the feature extraction model at least once to prevent this error.") def get_sparse_features(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim try: sparse_code = self.DL_obj.transform(whitened_patches) except NotFittedError: raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \ + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \ + "at least once to prevent this error.") return sparse_code def get_sign_split_features(self, sparse_features): n_samples, n_components = sparse_features.shape sign_split_features = np.empty((n_samples, 2 * n_components)) sign_split_features[:, :n_components] = np.maximum(sparse_features, 0) sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0) return sign_split_features def get_pooled_features(self, input_feature_map, filter_size=(19,19)): # assuming square filters and images filter_side = filter_size[0] # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20) input_feature_map_shape = input_feature_map.shape if input_feature_map.ndim == 2: input_feature_map_side = int(np.sqrt(input_feature_map.shape[0])) input_feature_map = input_feature_map.reshape((input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1])) assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" %input_feature_map.ndim # get windows (57,57,20) to (3,3,1,19,19,20) input_feature_map_windows = view_as_windows(input_feature_map, window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]), step=filter_size[0]) # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20) input_feature_map_windows = input_feature_map_windows.reshape((input_feature_map_windows.shape[0]**2, filter_size[0]**2, input_feature_map.shape[-1])) # calculate norms (9, 361, 20) to (9,361) input_feature_map_window_norms = np.linalg.norm(input_feature_map_windows, ord=2, axis=-1) # calculate indexes of max norms per window (9,361) to (9,1). One max index per window. max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1) # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window. pooled_features = input_feature_map_windows[np.arange(input_feature_map_windows.shape[0]), max_norm_indexes] # return pooled feature map return pooled_features # Combined Pipeline def get_pooled_features_from_whitened_patches(self, whitened_patches): sparse_features = self.get_sparse_features(whitened_patches) sign_split_features = self.get_sign_split_features(sparse_features) pooled_features = self.get_pooled_features(sign_split_features) return pooled_features
def main(): start = time.time() args = sys.argv target = args[1] sub = args[2] threshold = args[3] dimention = int(args[4]) shift = int(args[5]) sample = int(args[6]) print('target : {}'.format(target)) print('subject : {}'.format(sub)) print('{} secずらし'.format(shift)) #脳活動データ読み込み with open( '../data/Brain/' + target + '/' + sub + '_train_reduced_' + threshold + '.pickle', 'rb') as f: brain_data = pickle.load(f) #意味表象データ読み込み with open('../data/srm/' + target + '_srm300_train.pickle', 'rb') as f: semantic_data = pickle.load(f) #時間差を考慮した意味表象行列取得 brain_data, semantic_data = get_time_shift_data(brain_data, semantic_data, target, sub, shift) print('brain sample : {}'.format(len(brain_data))) print('semantic_data : {}'.format(len(semantic_data))) #2つを結合した合成行列を作成 brainw2vdata = np.c_[brain_data, semantic_data] brainw2vdata = np.array(brainw2vdata) brainw2vdata = brainw2vdata[::sample] print("次元:") print(brainw2vdata.shape) #辞書学習 dict_model = DictionaryLearning(n_components=dimention, alpha=1.0, transform_algorithm='lasso_lars', transform_alpha=1.0, fit_algorithm='lars', verbose=True) dict_model.fit(brainw2vdata) #辞書 Dict = dict_model.components_ print("辞書:") print(Dict.shape) #係数 coef = dict_model.transform(brainw2vdata) print("係数:") print(coef.shape) #辞書保存 f = open( "../data/Dict/" + target + "/Dict_" + sub + "_pred" + threshold + "_base" + str(dimention) + "_sec" + str(shift) + "_sample" + str(sample) + ".pickle", "wb") pickle.dump(Dict, f) f.close() #係数保存 f = open( "../data/Dict/" + target + "/Coef_" + sub + "_pred" + threshold + "_base" + str(dimention) + "_sec" + str(shift) + "_sample" + str(sample) + ".pickle", "wb") pickle.dump(coef, f) f.close() #計算時間出力 elapsed_time = time.time() - start print(("elapsed_time:{0}".format(elapsed_time)) + "[sec]")
#Decomposition分解 to classify分类 with DictionaryLearning from sklearn.decomposition import DictionaryLearning dl = DictionaryLearning(3) transformed = dl.fit_transform(iris_data[::2]) transformed[:5] #array([[ 0. , 6.34476574, 0. ], #[ 0. , 5.83576461, 0. ], #[ 0. , 6.32038375, 0. ], #[ 0. , 5.89318572, 0. ], #[ 0. , 5.45222715, 0. ]]) #Next, let's fit (not fit_transform) the testing set: transformed = dl.transform(iris_data[1::2]) #Putting it all together with Pipelines #Let's briefly load the iris dataset and seed it with some missing values: from sklearn.datasets import load_iris import numpy as np iris = load_iris() iris_data = iris.data mask = np.random.binomial(1, .25, iris_data.shape).astype(bool) iris_data[mask] = np.nan iris_data[:5] #array([[ 5.1, 3.5, 1.4, nan], #[ nan, 3. , 1.4, 0.2], #[ 4.7, 3.2, 1.3, 0.2],
# Select sample patches for training ch = numpy.random.permutation(Ynoisy.shape[1])[:N] Y = Ynoisy[:, ch].T print(Y.shape) # Training dictionary from sklearn.decomposition import DictionaryLearning dico = DictionaryLearning(n, transform_algorithm='omp', alpha=s, random_state=0, verbose=False) dico.fit(Y) # Testing the validity of the sparse representation Xt = dico.transform(Y) print(Xt.shape) numpy.testing.assert_array_almost_equal(numpy.dot(Xt, dico.components_), Y, decimal=1) # Generating sparse representation for entire image Xc = dico.transform(Ynoisy.T) print(Xc.T.shape) # D * X A = numpy.dot(Xc, dico.components_).T # Inverse centering, image restoration and output A = A + numpy.tile(Ymean, [Ynoisy.shape[0], 1]) Ic = col2im(A, (I.shape[0], I.shape[1]), (p, p)) show_bytes(Ic, "s_denoise_sliding_Ic.png")
D = D_fixed n_nonzero = 3 alpha = None algo = 'omp' color_1 = 'red' title = algo.upper() di = DictionaryLearning(n_components=n_components, fit_algorithm='cd', transform_algorithm='lasso_cd', positive_code=True, positive_dict=True) di.fit(comp_matrix) d = di.transform(comp_matrix) coder_1 = SparseCoder(dictionary=d.T, transform_n_nonzero_coefs=n_nonzero, transform_alpha=alpha, transform_algorithm=algo) comps, acts = librosa.decompose.decompose(comp_matrix, transformer=coder_1) plt.plot(comp_matrix[0, :], color='black', lw=2, linestyle='--', label='Original signal', alpha=0.5) plt.plot(acts[0, :],
else: with open(file_name, 'rb') as input: d = pickle.load(input) print("loaded dictionary") sparse_dict = np.transpose(d.components_) print("analyse pursuit") num_images_to_pursuit = 10 mean_support=0 average_element_size = 0 all_support_coeffs = np.array([]) for i in range(num_images_to_pursuit): idx = randint(0, x_train.shape[0]) sparse_vec = d.transform(x_train[idx:idx+1,:]) all_support_coeffs = np.append(all_support_coeffs, sparse_vec[sparse_vec!=0]) mean_support += np.count_nonzero(sparse_vec) average_element_size += np.average(np.abs(sparse_vec[sparse_vec!=0])) print("mean support is "+ str(mean_support/num_images_to_pursuit)) print("average_atom_coeff is " + str(average_element_size / num_images_to_pursuit)) #plt.hist(all_support_coeffs,bins=100) #plt.show() thrs = [0,0.01,0.1,0.5,1,2] figs, axs = plt.subplots(num_images_to_pursuit, len(thrs)+1) axs[0][0].set_title('bla') for k in range(1,len(thrs)+1): axs[0][k].set_title("thr "+str(thrs[k-1])) for i in range(num_images_to_pursuit):
class SparseCoding(object): def __init__(self, n, transform_algorithm='lars'): self.n = n self.net = DictionaryLearning(n_components=n, alpha=0.8, max_iter=1000) self.net.set_params(transform_algorithm=transform_algorithm) def plot_B(self, B): plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(B[:self.n]): plt.subplot(10, 10, i + 1) plt.imshow(comp, cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from time series\n' + 'Train time %.1fs on %d patches' % (dt, len(data)), fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def _init(self): a = np.random.random((self.n, self.m)) b = np.random.random((self.T, self.n)) b /= sum(b) return a, b def init_weights(self, X_mat): B, A, recon = [], [], [] for app in X_mat: data = X_mat[app].reshape(1, -1) B_i = self.net.fit(data).components_ A_i = self.net.transform(data) X_hat = np.dot(A_i, B_i) B.append(B_i) A.append(A_i) recon.append(X_hat) print("MSE Error: ", np.mean((data - X_hat)**2)) return A, B, recon def DiscriminativeDisaggregation(self, appliances, B, A): x = np.array([appliances[app] for app in appliances]) x = x.T A_star = np.vstack(A) B_cat = np.hstack(B) change = 1 t = 0 print(A_star.shape) print(B_cat.shape) while t <= self.steps and self.epsilon <= change: B_cat_p = B_cat acts = self.F(x, B_cat, A=A_star) B_cat = (B_cat - self.alpha * ((x - B_cat.dot(acts)).dot(acts.T) - (x - B_cat.dot(A_star)).dot(A_star.T))) B_cat = self._pos_constraint(B_cat) B_cat /= sum(B_cat) t += 1 change = np.linalg.norm(B_cat - B_cat_p) print("Change is {} and step is {} ".format(change, t)) return B_cat def F(self, x, B, x_train=None, A=None, rp_tep=False, rp_gl=False): B = np.asarray(B) A = np.asarray(A) coder = SparseCoder(dictionary=B.T, transform_alpha=self.rp, transform_algorithm='lasso_cd') comps, acts = librosa.decompose.decompose(x, transformer=coder) acts = self._pos_constraint(acts) return acts def predict(self, A, B): print(A.shape) print(B.shape) return B.dot(A)
class SparseCoding: DEFAULT_MODEL_PARAMS = { 'n_components': 10, 'n_features': 64, 'max_iter': 5, 'random_state': 1, 'dict_init': None, 'code_init': None } def __init__(self, model_filename=None): if model_filename is not None: self.load_model(model_filename) else: # default model params self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS[ 'n_components'] self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features'] self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter'] self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS[ 'random_state'] self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init'] self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init'] # initialize Dictionary Learning object with default params and weights self.DL_obj = DictionaryLearning(n_components=self.n_components, alpha=1, max_iter=self.max_iter, tol=1e-08, fit_algorithm='lars', transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, n_jobs=1, code_init=self.code_init, dict_init=self.dict_init, verbose=False, split_sign=False, random_state=self.random_state) def save_model(self, filename): # save DL object to file, compress is also to prevent multiple model files. joblib.dump(self.DL_obj, filename, compress=3) def load_model(self, filename): # load DL Object from file self.DL_obj = joblib.load(filename) # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values. DL_params = self.DL_obj.get_params() for param in SparseCoding.DEFAULT_MODEL_PARAMS: if param in DL_params: setattr(self, param, DL_params[param]) else: setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param]) def learn_dictionary(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape( (whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim # learn dictionary self.DL_obj.fit(whitened_patches) def get_dictionary(self): try: return self.DL_obj.components_ except AttributeError: raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \ + "Train the feature extraction model at least once to prevent this error.") def get_sparse_features(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape( (whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim try: sparse_code = self.DL_obj.transform(whitened_patches) except NotFittedError: raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \ + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \ + "at least once to prevent this error.") return sparse_code def get_sign_split_features(self, sparse_features): n_samples, n_components = sparse_features.shape sign_split_features = np.empty((n_samples, 2 * n_components)) sign_split_features[:, :n_components] = np.maximum(sparse_features, 0) sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0) return sign_split_features def get_pooled_features(self, input_feature_map, filter_size=(19, 19)): # assuming square filters and images filter_side = filter_size[0] # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20) input_feature_map_shape = input_feature_map.shape if input_feature_map.ndim == 2: input_feature_map_side = int(np.sqrt(input_feature_map.shape[0])) input_feature_map = input_feature_map.reshape( (input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1])) assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" % input_feature_map.ndim # get windows (57,57,20) to (3,3,1,19,19,20) input_feature_map_windows = view_as_windows( input_feature_map, window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]), step=filter_size[0]) # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20) input_feature_map_windows = input_feature_map_windows.reshape( (input_feature_map_windows.shape[0]**2, filter_size[0]**2, input_feature_map.shape[-1])) # calculate norms (9, 361, 20) to (9,361) input_feature_map_window_norms = np.linalg.norm( input_feature_map_windows, ord=2, axis=-1) # calculate indexes of max norms per window (9,361) to (9,1). One max index per window. max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1) # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window. pooled_features = input_feature_map_windows[ np.arange(input_feature_map_windows.shape[0]), max_norm_indexes] # return pooled feature map return pooled_features # Combined Pipeline def get_pooled_features_from_whitened_patches(self, whitened_patches): sparse_features = self.get_sparse_features(whitened_patches) sign_split_features = self.get_sign_split_features(sparse_features) pooled_features = self.get_pooled_features(sign_split_features) return pooled_features
import pandas as pd import dill N_COMPONENTS = 500 TRANSFORM_N_NONZERO_COEFS = 10 VERBOSE = True MAX_ITER = 10 MatBrainImage=scipy.io.loadmat(r"C:\Users\ktmks\Documents\research\tmp_results\for_python_data\brain_f_data.mat") label=MatBrainImage["label"] Y=MatBrainImage["data"] dic=DictionaryLearning(n_components = N_COMPONENTS, transform_n_nonzero_coefs = TRANSFORM_N_NONZERO_COEFS, verbose = VERBOSE, max_iter = MAX_ITER ) dic.fit(Y) D=dic.components_ X=dic.transform(Y) Y_=np.dot(X,D) filepath = r"C:\Users\ktmks\Documents\research\Python\Brain_DL"+"\\" filename = "res_"+"AtomN-" + str(N_COMPONENTS)\ +"_SparseDegree-" + str(TRANSFORM_N_NONZERO_COEFS)\ +"_MaxIter-" + str(MAX_ITER) save_filename=filepath+filename+".pkl" dill.dump_session(save_filename) scipy.io.savemat(filename+".mat",{"D":D,"X":X,"Y_":Y_,"label":label})