Ejemplo n.º 1
0
def test_dict_learning_lars_code_positivity():
    n_components = 5
    dico = DictionaryLearning(
        n_components, transform_algorithm="lars", random_state=0,
        positive_code=True, fit_algorithm="cd").fit(X)

    err_msg = "Positive constraint not supported for '{}' coding method."
    err_msg = err_msg.format("lars")
    with pytest.raises(ValueError, match=err_msg):
        dico.transform(X)
Ejemplo n.º 2
0
def test_dict_learning_split():
    n_atoms = 5
    dico = DictionaryLearning(n_atoms, transform_algorithm='threshold')
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
def test_dict_learning_split():
    n_atoms = 5
    dico = DictionaryLearning(n_atoms, transform_algorithm='threshold')
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
def test_dict_learning_shapes():
    n_components = 5
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert_equal(dico.components_.shape, (n_components, n_features))

    n_components = 1
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert_equal(dico.components_.shape, (n_components, n_features))
    assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
Ejemplo n.º 5
0
def test_dict_learning_shapes():
    n_components = 5
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert dico.components_.shape == (n_components, n_features)

    n_components = 1
    dico = DictionaryLearning(n_components, random_state=0).fit(X)
    assert dico.components_.shape == (n_components, n_features)
    assert dico.transform(X).shape == (X.shape[0], n_components)
Ejemplo n.º 6
0
def test_dict_learning_split():
    n_components = 5
    dico = DictionaryLearning(n_components, transform_algorithm='threshold',
                              random_state=0)
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_almost_equal(split_code[:, :n_components] -
                              split_code[:, n_components:], code)
def test_dict_learning_split():
    n_components = 5
    dico = DictionaryLearning(n_components, transform_algorithm='threshold',
                              random_state=0)
    code = dico.fit(X).transform(X)
    dico.split_sign = True
    split_code = dico.transform(X)

    assert_array_equal(split_code[:, :n_components] -
                       split_code[:, n_components:], code)
Ejemplo n.º 8
0
def test_dict_learning_reconstruction():
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def test_dict_learning_reconstruction():
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def test_dict_learning_nonzero_coefs():
    n_components = 4
    dico = DictionaryLearning(n_components, transform_algorithm='lars',
                              transform_n_nonzero_coefs=3, random_state=0)
    code = dico.fit(X).transform(X[np.newaxis, 1])
    assert_true(len(np.flatnonzero(code)) == 3)

    dico.set_params(transform_algorithm='omp')
    code = dico.transform(X[np.newaxis, 1])
    assert_equal(len(np.flatnonzero(code)), 3)
Ejemplo n.º 11
0
def test_dict_learning_nonzero_coefs():
    n_components = 4
    dico = DictionaryLearning(n_components, transform_algorithm='lars',
                              transform_n_nonzero_coefs=3, random_state=0)
    code = dico.fit(X).transform(X[np.newaxis, 1])
    assert len(np.flatnonzero(code)) == 3

    dico.set_params(transform_algorithm='omp')
    code = dico.transform(X[np.newaxis, 1])
    assert len(np.flatnonzero(code)) == 3
def test_dict_learning_reconstruction_parallel():
    # regression test that parallel reconstruction works with n_jobs=-1
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0, n_jobs=-1)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
Ejemplo n.º 13
0
def test_dict_learning_reconstruction_parallel():
    # regression test that parallel reconstruction works with n_jobs>1
    n_components = 12
    dico = DictionaryLearning(n_components, transform_algorithm='omp',
                              transform_alpha=0.001, random_state=0, n_jobs=4)
    code = dico.fit(X).transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X)

    dico.set_params(transform_algorithm='lasso_lars')
    code = dico.transform(X)
    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
Ejemplo n.º 14
0
def dictionary_learn():
    x = [[1, -2, 3, 4, 5.], [3, 4, -5, 6, 7], [1, 7, 2, -6, 2],
         [3, 8, 6, 2, -8]]
    print(x)

    dct = DictionaryLearning(n_components=5)
    dct.fit(x)

    print(dct.components_)
    print(dct.transform(x))

    pass
Ejemplo n.º 15
0
def test_dict_learning_nonzero_coefs():
    n_atoms = 4
    dico = DictionaryLearning(n_atoms,
                              transform_algorithm='lars',
                              transform_n_nonzero_coefs=3,
                              random_state=0)
    code = dico.fit(X).transform(X[1])
    assert_true(len(np.flatnonzero(code)) == 3)

    dico.set_params(transform_algorithm='omp')
    code = dico.transform(X[1])
    assert_equal(len(np.flatnonzero(code)), 3)
Ejemplo n.º 16
0
def test_DictionaryLearning(n_components):
    '''
    测试 DictionaryLearning 的用法

    :return: None
    '''
    X = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [10, 9, 8, 7, 6], [5, 4, 3, 2, 1]]
    print("before transform:", X)
    dct = DictionaryLearning(n_components=n_components)
    dct.fit(X)
    print("components is :", dct.components_)
    print("after transform:", dct.transform(X))
Ejemplo n.º 17
0
def test_DictionaryLearning():
    from sklearn.decomposition import DictionaryLearning
    x = [
        [1, 2, 3, 4, 5],
        [6, 7, 8, 9, 10],
        [10, 9, 8, 7, 6],
        [5, 4, 3, 2, 1]
    ]
    print("before transform:", x)
    dct = DictionaryLearning(n_components=3)
    dct.fit(x)
    print("components is :", dct.components_)
    print("after transform:", dct.transform(x))
Ejemplo n.º 18
0
class _DictionaryLearningImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
def test_dictionary_learning_dtype_match(
    data_type,
    expected_type,
    fit_algorithm,
    transform_algorithm,
):
    # Verify preserving dtype for fit and transform in dictionary learning class
    dict_learner = DictionaryLearning(
        n_components=8,
        fit_algorithm=fit_algorithm,
        transform_algorithm=transform_algorithm,
        random_state=0,
    )
    dict_learner.fit(X.astype(data_type))
    assert dict_learner.components_.dtype == expected_type
    assert dict_learner.transform(X.astype(data_type)).dtype == expected_type
Ejemplo n.º 20
0
def test_dict_learning_positivity(transform_algorithm,
                                  positive_code,
                                  positive_dict):
    n_components = 5
    dico = DictionaryLearning(
        n_components, transform_algorithm=transform_algorithm, random_state=0,
        positive_code=positive_code, positive_dict=positive_dict).fit(X)
    code = dico.transform(X)
    if positive_dict:
        assert_true((dico.components_ >= 0).all())
    else:
        assert_true((dico.components_ < 0).any())
    if positive_code:
        assert_true((code >= 0).all())
    else:
        assert_true((code < 0).any())
Ejemplo n.º 21
0
def test_size():
    np.random.seed(0)
    N = 100
    L = 128
    X = np.random.randn(N, 10) + np.random.rand(N, 10)
    dico1 = ApproximateKSVD(n_components=L)
    dico1.fit(X)
    gamma1 = dico1.transform(X)
    e1 = norm(X - gamma1.dot(dico1.components_))

    dico2 = DictionaryLearning(n_components=L)
    dico2.fit(X)
    gamma2 = dico2.transform(X)
    e2 = norm(X - gamma2.dot(dico2.components_))

    assert dico1.components_.shape == dico2.components_.shape
    assert gamma1.shape == gamma2.shape
    assert e1 < e2
Ejemplo n.º 22
0
class DICL:
    def __init__(self, rfe_cv, *args, **kwargs):
        self.rfe = None
        self.rfe_cv = rfe_cv
        self.model = DictionaryLearning(*args, **kwargs)

    def fit(self, X, y):
        Z = numpy.concatenate([X, y.reshape(-1, 1)], axis=1)
        Z = numpy.array(Z, dtype=numpy.float32)
        Z[Z == numpy.inf] = numpy.nan
        Z[Z == -numpy.inf] = numpy.nan
        X_, y_ = X[~pandas.isna(Z).any(axis=1), :], y[~pandas.isna(Z).any(
            axis=1)]
        if Z.shape[0] != X.shape[0]:
            print(
                'FIT: the sample contains NaNs, they were dropped\tN of dropped NaNs: {0}'
                .format(X.shape[0] - X_.shape[0]))
        if self.rfe_cv:
            raise Exception("PCA could not be processed with RFE_CV")
        else:
            self.model.fit(X_)

    def predict(self, X):
        Z = numpy.concatenate([X], axis=1)
        Z = numpy.array(Z, dtype=numpy.float32)
        Z[Z == numpy.inf] = numpy.nan
        Z[Z == -numpy.inf] = numpy.nan
        nan_mask = ~pandas.isna(Z).any(axis=1)
        X_ = X[nan_mask, :]
        if Z.shape[0] != X.shape[0]:
            print(
                'PREDICT: the sample contains NaNs, they were dropped\tN of dropped NaNs: {0}'
                .format(X.shape[0] - X_.shape[0]))
        if self.rfe_cv:
            raise Exception("PCA could not be processed with RFE_CV")
        else:
            predicted = self.model.transform(X_)
            Z = numpy.full(shape=(X.shape[0], predicted.shape[1]),
                           fill_value=numpy.nan,
                           dtype=numpy.float64)
            Z[nan_mask, :] = predicted
        return Z
Ejemplo n.º 23
0
def test_size():
    np.random.seed(0)
    N = 50
    L = 12
    n_features = 16
    D = np.random.randn(L, n_features)
    B = np.array(sp.sparse.random(N, L, density=0.5).todense())
    X = np.dot(B, D)
    dico1 = ApproximateKSVD(n_components=L, transform_n_nonzero_coefs=L)
    dico1.fit(X)
    gamma1 = dico1.transform(X)
    e1 = norm(X - gamma1.dot(dico1.components_))

    dico2 = DictionaryLearning(n_components=L, transform_n_nonzero_coefs=L)
    dico2.fit(X)
    gamma2 = dico2.transform(X)
    e2 = norm(X - gamma2.dot(dico2.components_))

    assert dico1.components_.shape == dico2.components_.shape
    assert gamma1.shape == gamma2.shape
    assert e1 < e2
Ejemplo n.º 24
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

def test_Pipeline(data):
    X_train,X_test,y_train,y_test = data
    steps = [('Linear_SVM',LinearSVC(C=1,penalty='l1',dual=False))]
    pipeline = Pipeline(steps)
    pipeline.fit(X_train,y_train)
    print('name steps : \n',pipeline.named_steps)
    print('Pipeline score : \n',pipeline.score(X_test,y_test))

data = load_digits()
X = data.data
y = data.target
test_Pipeline(model_selection.train_test_split(X,y,test_size=0.25,stratify=y))

#字典学习
from sklearn.decomposition import DictionaryLearning
X= [[1,2,3,4,5],
    [6,7,8,9,10],
    [10,9,8,7,6],
    [5,4,3,2,1]]
dct = DictionaryLearning(n_components=3)
dct.fit(X)
dct.transform(X)





class DictionaryLearningMethod(BaseMethod):
    """Implement the dict learning method of the paper using sklearn."""

    def __init__(self, width=24, stride=12, n_components=10, alpha=1,
                 verbose=1, random_state=0, n_jobs=4, max_iter=1):
        self.width = width
        self.stride = stride

        self.n_components = n_components
        self.alpha = alpha
        self.verbose = verbose
        self.random_state = random_state
        self.n_jobs = n_jobs
        self.max_iter = max_iter

        self.estimator = DictionaryLearning(
            n_components=n_components,
            alpha=alpha,
            verbose=verbose,
            random_state=random_state,
            n_jobs=n_jobs,
            max_iter=max_iter,
        )

    @staticmethod
    def window_split(X, s, w):
        """From a signal, create an array of overlapping windows."""
        X = np.array(X).reshape(-1, 1)

        if w > X.shape[0]:
            raise ValueError(f'Window width bigger than signal size ({w}>{X.shape[0]}).')

        n_h = X.shape[0]
        c = int((n_h - w)/s + 1)

        Xs = []
        for k in range(c):
            i = w + k*s
            x = X[i-w:i]
            Xs.append(x)

        return np.concatenate(Xs, axis=1)

    @staticmethod
    def window_merge(X_h, s):
        """From array of overlapping windows, reconstruct the original signal.

        Parameters:
        -----------
            X_h : np.array of shape (w, c)
                Array of overlapping windows.
            s : int
                Stride

        Returns:
        --------
            X : np.array of shape

        """
        w, c = X_h.shape
        W = np.zeros((c, w+s*(c-1)))

        for i in range(c):
            W[i, i*s:i*s+w] = X_h[:, i]

        N = np.sum(W != 0, axis=0)
        x_hat = np.divide(np.sum(W, axis=0), N)
        return x_hat

    def fit(self, X, y=None):
        X_h = self.window_split(X, self.stride, self.width)
        self.estimator.fit(X_h.T)

    def transform_codes(self, X):
        X_h = self.window_split(X, self.stride, self.width)
        X_pred_codes = self.estimator.transform(X_h.T).T
        return X_pred_codes

    def codes_to_signal(self, X_codes):
        D = self.estimator.components_.T
        X_h = D@X_codes
        X = self.window_merge(X_h, self.stride)
        return X

    def transform(self, X):
        X_pred_codes = self.transform_codes(X)
        X_pred = self.codes_to_signal(X_pred_codes)
        return X_pred

    def get_atoms(self):
        return self.estimator.components_.T
Ejemplo n.º 26
0
class SparseCoding:

    DEFAULT_MODEL_PARAMS = {
        'n_components' : 10,
        'n_features' : 64,
        'max_iter' : 5,
        'random_state' : 1,
        'dict_init' : None,
        'code_init' : None
    }

    def __init__(self, model_filename=None):
        if model_filename is not None:
            self.load_model(model_filename)
        else:
            # default model params
            self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components']
            self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features']
            self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter']
            self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state']
            self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init']
            self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init']

            # initialize Dictionary Learning object with default params and weights
            self.DL_obj = DictionaryLearning(n_components=self.n_components,
                                       alpha=1,
                                       max_iter=self.max_iter,
                                       tol=1e-08,
                                       fit_algorithm='lars',
                                       transform_algorithm='omp',
                                       transform_n_nonzero_coefs=None,
                                       transform_alpha=None,
                                       n_jobs=1,
                                       code_init=self.code_init,
                                       dict_init=self.dict_init,
                                       verbose=False,
                                       split_sign=False,
                                       random_state=self.random_state)


    def save_model(self, filename):
        # save DL object to file, compress is also to prevent multiple model files.
        joblib.dump(self.DL_obj, filename, compress=3)


    def load_model(self, filename):
        # load DL Object from file
        self.DL_obj = joblib.load(filename)

        # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values.
        DL_params = self.DL_obj.get_params()
        for param in SparseCoding.DEFAULT_MODEL_PARAMS:
            if param in DL_params:
                setattr(self, param, DL_params[param])
            else:
                setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param])


    def learn_dictionary(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim

        # learn dictionary
        self.DL_obj.fit(whitened_patches)


    def get_dictionary(self):
        try:
            return self.DL_obj.components_
        except AttributeError:
            raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \
                                 + "Train the feature extraction model at least once to prevent this error.")


    def get_sparse_features(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim
        try:
            sparse_code = self.DL_obj.transform(whitened_patches)
        except NotFittedError:
            raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \
                                 + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \
                                 + "at least once to prevent this error.")
        return sparse_code


    def get_sign_split_features(self, sparse_features):
        n_samples, n_components = sparse_features.shape
        sign_split_features = np.empty((n_samples, 2 * n_components))
        sign_split_features[:, :n_components] = np.maximum(sparse_features, 0)
        sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0)
        return sign_split_features


    def get_pooled_features(self, input_feature_map, filter_size=(19,19)):
        # assuming square filters and images
        filter_side = filter_size[0]

        # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20)
        input_feature_map_shape = input_feature_map.shape
        if input_feature_map.ndim == 2:
            input_feature_map_side = int(np.sqrt(input_feature_map.shape[0]))
            input_feature_map = input_feature_map.reshape((input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1]))
        assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" %input_feature_map.ndim

        # get windows (57,57,20) to (3,3,1,19,19,20)
        input_feature_map_windows = view_as_windows(input_feature_map,
                                                    window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]),
                                                    step=filter_size[0])

        # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20)
        input_feature_map_windows = input_feature_map_windows.reshape((input_feature_map_windows.shape[0]**2,
                                                                       filter_size[0]**2,
                                                                       input_feature_map.shape[-1]))

        # calculate norms (9, 361, 20) to (9,361)
        input_feature_map_window_norms = np.linalg.norm(input_feature_map_windows, ord=2, axis=-1)

        # calculate indexes of max norms per window (9,361) to (9,1). One max index per window.
        max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1)

        # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window.
        pooled_features = input_feature_map_windows[np.arange(input_feature_map_windows.shape[0]), max_norm_indexes]

        # return pooled feature map
        return pooled_features


    # Combined Pipeline
    def get_pooled_features_from_whitened_patches(self, whitened_patches):
        sparse_features = self.get_sparse_features(whitened_patches)
        sign_split_features = self.get_sign_split_features(sparse_features)
        pooled_features = self.get_pooled_features(sign_split_features)
        return pooled_features
Ejemplo n.º 27
0
def main():

    start = time.time()

    args = sys.argv
    target = args[1]
    sub = args[2]
    threshold = args[3]
    dimention = int(args[4])
    shift = int(args[5])
    sample = int(args[6])

    print('target : {}'.format(target))
    print('subject : {}'.format(sub))

    print('{} secずらし'.format(shift))

    #脳活動データ読み込み
    with open(
            '../data/Brain/' + target + '/' + sub + '_train_reduced_' +
            threshold + '.pickle', 'rb') as f:
        brain_data = pickle.load(f)

    #意味表象データ読み込み
    with open('../data/srm/' + target + '_srm300_train.pickle', 'rb') as f:
        semantic_data = pickle.load(f)

    #時間差を考慮した意味表象行列取得
    brain_data, semantic_data = get_time_shift_data(brain_data, semantic_data,
                                                    target, sub, shift)

    print('brain sample : {}'.format(len(brain_data)))
    print('semantic_data : {}'.format(len(semantic_data)))

    #2つを結合した合成行列を作成
    brainw2vdata = np.c_[brain_data, semantic_data]
    brainw2vdata = np.array(brainw2vdata)

    brainw2vdata = brainw2vdata[::sample]

    print("次元:")
    print(brainw2vdata.shape)

    #辞書学習
    dict_model = DictionaryLearning(n_components=dimention,
                                    alpha=1.0,
                                    transform_algorithm='lasso_lars',
                                    transform_alpha=1.0,
                                    fit_algorithm='lars',
                                    verbose=True)
    dict_model.fit(brainw2vdata)

    #辞書
    Dict = dict_model.components_
    print("辞書:")
    print(Dict.shape)

    #係数
    coef = dict_model.transform(brainw2vdata)
    print("係数:")
    print(coef.shape)

    #辞書保存
    f = open(
        "../data/Dict/" + target + "/Dict_" + sub + "_pred" + threshold +
        "_base" + str(dimention) + "_sec" + str(shift) + "_sample" +
        str(sample) + ".pickle", "wb")
    pickle.dump(Dict, f)
    f.close()

    #係数保存
    f = open(
        "../data/Dict/" + target + "/Coef_" + sub + "_pred" + threshold +
        "_base" + str(dimention) + "_sec" + str(shift) + "_sample" +
        str(sample) + ".pickle", "wb")
    pickle.dump(coef, f)
    f.close()

    #計算時間出力
    elapsed_time = time.time() - start
    print(("elapsed_time:{0}".format(elapsed_time)) + "[sec]")
Ejemplo n.º 28
0

#Decomposition分解 to classify分类 with DictionaryLearning

from sklearn.decomposition import DictionaryLearning
dl = DictionaryLearning(3)
transformed = dl.fit_transform(iris_data[::2])
transformed[:5]
#array([[ 0. , 6.34476574, 0. ],
#[ 0. , 5.83576461, 0. ],
#[ 0. , 6.32038375, 0. ],
#[ 0. , 5.89318572, 0. ],
#[ 0. , 5.45222715, 0. ]])

#Next, let's fit (not fit_transform) the testing set:
transformed = dl.transform(iris_data[1::2])


#Putting it all together with Pipelines

#Let's briefly load the iris dataset and seed it with some missing values:
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
iris_data = iris.data
mask = np.random.binomial(1, .25, iris_data.shape).astype(bool)
iris_data[mask] = np.nan
iris_data[:5]
#array([[ 5.1, 3.5, 1.4, nan],
#[ nan, 3. , 1.4, 0.2],
#[ 4.7, 3.2, 1.3, 0.2],
Ejemplo n.º 29
0
# Select sample patches for training
ch = numpy.random.permutation(Ynoisy.shape[1])[:N]
Y = Ynoisy[:, ch].T
print(Y.shape)

# Training dictionary
from sklearn.decomposition import DictionaryLearning
dico = DictionaryLearning(n,
                          transform_algorithm='omp',
                          alpha=s,
                          random_state=0,
                          verbose=False)
dico.fit(Y)

# Testing the validity of the sparse representation
Xt = dico.transform(Y)
print(Xt.shape)
numpy.testing.assert_array_almost_equal(numpy.dot(Xt, dico.components_),
                                        Y,
                                        decimal=1)

# Generating sparse representation for entire image
Xc = dico.transform(Ynoisy.T)
print(Xc.T.shape)
# D * X
A = numpy.dot(Xc, dico.components_).T

# Inverse centering, image restoration and output
A = A + numpy.tile(Ymean, [Ynoisy.shape[0], 1])
Ic = col2im(A, (I.shape[0], I.shape[1]), (p, p))
show_bytes(Ic, "s_denoise_sliding_Ic.png")
Ejemplo n.º 30
0
D = D_fixed
n_nonzero = 3
alpha = None
algo = 'omp'
color_1 = 'red'
title = algo.upper()

di = DictionaryLearning(n_components=n_components,
                        fit_algorithm='cd',
                        transform_algorithm='lasso_cd',
                        positive_code=True,
                        positive_dict=True)

di.fit(comp_matrix)

d = di.transform(comp_matrix)

coder_1 = SparseCoder(dictionary=d.T,
                      transform_n_nonzero_coefs=n_nonzero,
                      transform_alpha=alpha,
                      transform_algorithm=algo)

comps, acts = librosa.decompose.decompose(comp_matrix, transformer=coder_1)

plt.plot(comp_matrix[0, :],
         color='black',
         lw=2,
         linestyle='--',
         label='Original signal',
         alpha=0.5)
plt.plot(acts[0, :],
Ejemplo n.º 31
0
else:
  with open(file_name, 'rb') as input:
    d = pickle.load(input)
  print("loaded dictionary")
  sparse_dict = np.transpose(d.components_)
  print("analyse pursuit")

  num_images_to_pursuit = 10
  mean_support=0
  average_element_size = 0
  all_support_coeffs = np.array([])
  for i in range(num_images_to_pursuit):

    idx = randint(0, x_train.shape[0])
    sparse_vec = d.transform(x_train[idx:idx+1,:])
    all_support_coeffs = np.append(all_support_coeffs, sparse_vec[sparse_vec!=0])
    mean_support += np.count_nonzero(sparse_vec)
    average_element_size += np.average(np.abs(sparse_vec[sparse_vec!=0]))
  print("mean support is "+ str(mean_support/num_images_to_pursuit))
  print("average_atom_coeff is " + str(average_element_size / num_images_to_pursuit))
  #plt.hist(all_support_coeffs,bins=100)
  #plt.show()
  thrs = [0,0.01,0.1,0.5,1,2]
  figs, axs = plt.subplots(num_images_to_pursuit, len(thrs)+1)
  axs[0][0].set_title('bla')

  for k in range(1,len(thrs)+1):
    axs[0][k].set_title("thr "+str(thrs[k-1]))

  for i in range(num_images_to_pursuit):
Ejemplo n.º 32
0
class SparseCoding(object):
    def __init__(self, n, transform_algorithm='lars'):
        self.n = n
        self.net = DictionaryLearning(n_components=n, alpha=0.8, max_iter=1000)
        self.net.set_params(transform_algorithm=transform_algorithm)

    def plot_B(self, B):
        plt.figure(figsize=(4.2, 4))
        for i, comp in enumerate(B[:self.n]):
            plt.subplot(10, 10, i + 1)
            plt.imshow(comp, cmap=plt.cm.gray_r, interpolation='nearest')
            plt.xticks(())
            plt.yticks(())

        plt.suptitle('Dictionary learned from time series\n' +
                     'Train time %.1fs on %d patches' % (dt, len(data)),
                     fontsize=16)

        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def _init(self):
        a = np.random.random((self.n, self.m))
        b = np.random.random((self.T, self.n))
        b /= sum(b)
        return a, b

    def init_weights(self, X_mat):
        B, A, recon = [], [], []
        for app in X_mat:
            data = X_mat[app].reshape(1, -1)
            B_i = self.net.fit(data).components_
            A_i = self.net.transform(data)
            X_hat = np.dot(A_i, B_i)

            B.append(B_i)
            A.append(A_i)
            recon.append(X_hat)

            print("MSE Error: ", np.mean((data - X_hat)**2))

        return A, B, recon

    def DiscriminativeDisaggregation(self, appliances, B, A):

        x = np.array([appliances[app] for app in appliances])
        x = x.T

        A_star = np.vstack(A)
        B_cat = np.hstack(B)
        change = 1
        t = 0

        print(A_star.shape)
        print(B_cat.shape)

        while t <= self.steps and self.epsilon <= change:
            B_cat_p = B_cat
            acts = self.F(x, B_cat, A=A_star)
            B_cat = (B_cat - self.alpha *
                     ((x - B_cat.dot(acts)).dot(acts.T) -
                      (x - B_cat.dot(A_star)).dot(A_star.T)))
            B_cat = self._pos_constraint(B_cat)
            B_cat /= sum(B_cat)

            t += 1
            change = np.linalg.norm(B_cat - B_cat_p)
            print("Change is {} and step is {} ".format(change, t))

        return B_cat

    def F(self, x, B, x_train=None, A=None, rp_tep=False, rp_gl=False):
        B = np.asarray(B)
        A = np.asarray(A)
        coder = SparseCoder(dictionary=B.T,
                            transform_alpha=self.rp,
                            transform_algorithm='lasso_cd')

        comps, acts = librosa.decompose.decompose(x, transformer=coder)
        acts = self._pos_constraint(acts)
        return acts

    def predict(self, A, B):
        print(A.shape)
        print(B.shape)

        return B.dot(A)
Ejemplo n.º 33
0
class SparseCoding:

    DEFAULT_MODEL_PARAMS = {
        'n_components': 10,
        'n_features': 64,
        'max_iter': 5,
        'random_state': 1,
        'dict_init': None,
        'code_init': None
    }

    def __init__(self, model_filename=None):
        if model_filename is not None:
            self.load_model(model_filename)
        else:
            # default model params
            self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS[
                'n_components']
            self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features']
            self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter']
            self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS[
                'random_state']
            self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init']
            self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init']

            # initialize Dictionary Learning object with default params and weights
            self.DL_obj = DictionaryLearning(n_components=self.n_components,
                                             alpha=1,
                                             max_iter=self.max_iter,
                                             tol=1e-08,
                                             fit_algorithm='lars',
                                             transform_algorithm='omp',
                                             transform_n_nonzero_coefs=None,
                                             transform_alpha=None,
                                             n_jobs=1,
                                             code_init=self.code_init,
                                             dict_init=self.dict_init,
                                             verbose=False,
                                             split_sign=False,
                                             random_state=self.random_state)

    def save_model(self, filename):
        # save DL object to file, compress is also to prevent multiple model files.
        joblib.dump(self.DL_obj, filename, compress=3)

    def load_model(self, filename):
        # load DL Object from file
        self.DL_obj = joblib.load(filename)

        # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values.
        DL_params = self.DL_obj.get_params()
        for param in SparseCoding.DEFAULT_MODEL_PARAMS:
            if param in DL_params:
                setattr(self, param, DL_params[param])
            else:
                setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param])

    def learn_dictionary(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape(
                (whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim

        # learn dictionary
        self.DL_obj.fit(whitened_patches)

    def get_dictionary(self):
        try:
            return self.DL_obj.components_
        except AttributeError:
            raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \
                                 + "Train the feature extraction model at least once to prevent this error.")

    def get_sparse_features(self, whitened_patches):
        # assert correct dimensionality of input data
        if whitened_patches.ndim == 3:
            whitened_patches = whitened_patches.reshape(
                (whitened_patches.shape[0], -1))
        assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim
        try:
            sparse_code = self.DL_obj.transform(whitened_patches)
        except NotFittedError:
            raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \
                                 + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \
                                 + "at least once to prevent this error.")
        return sparse_code

    def get_sign_split_features(self, sparse_features):
        n_samples, n_components = sparse_features.shape
        sign_split_features = np.empty((n_samples, 2 * n_components))
        sign_split_features[:, :n_components] = np.maximum(sparse_features, 0)
        sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0)
        return sign_split_features

    def get_pooled_features(self, input_feature_map, filter_size=(19, 19)):
        # assuming square filters and images
        filter_side = filter_size[0]

        # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20)
        input_feature_map_shape = input_feature_map.shape
        if input_feature_map.ndim == 2:
            input_feature_map_side = int(np.sqrt(input_feature_map.shape[0]))
            input_feature_map = input_feature_map.reshape(
                (input_feature_map_side, input_feature_map_side,
                 input_feature_map_shape[-1]))
        assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" % input_feature_map.ndim

        # get windows (57,57,20) to (3,3,1,19,19,20)
        input_feature_map_windows = view_as_windows(
            input_feature_map,
            window_shape=(filter_size[0], filter_size[1],
                          input_feature_map.shape[-1]),
            step=filter_size[0])

        # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20)
        input_feature_map_windows = input_feature_map_windows.reshape(
            (input_feature_map_windows.shape[0]**2, filter_size[0]**2,
             input_feature_map.shape[-1]))

        # calculate norms (9, 361, 20) to (9,361)
        input_feature_map_window_norms = np.linalg.norm(
            input_feature_map_windows, ord=2, axis=-1)

        # calculate indexes of max norms per window (9,361) to (9,1). One max index per window.
        max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1)

        # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window.
        pooled_features = input_feature_map_windows[
            np.arange(input_feature_map_windows.shape[0]), max_norm_indexes]

        # return pooled feature map
        return pooled_features

    # Combined Pipeline
    def get_pooled_features_from_whitened_patches(self, whitened_patches):
        sparse_features = self.get_sparse_features(whitened_patches)
        sign_split_features = self.get_sign_split_features(sparse_features)
        pooled_features = self.get_pooled_features(sign_split_features)
        return pooled_features
Ejemplo n.º 34
0
import pandas as pd
import dill

N_COMPONENTS               =  500
TRANSFORM_N_NONZERO_COEFS  =   10
VERBOSE                    =  True
MAX_ITER                   =   10

MatBrainImage=scipy.io.loadmat(r"C:\Users\ktmks\Documents\research\tmp_results\for_python_data\brain_f_data.mat")

label=MatBrainImage["label"]
Y=MatBrainImage["data"]

dic=DictionaryLearning(n_components              =              N_COMPONENTS,
                       transform_n_nonzero_coefs = TRANSFORM_N_NONZERO_COEFS,
                       verbose                   =                   VERBOSE,
                       max_iter                  =                  MAX_ITER 
                       )
dic.fit(Y)
D=dic.components_
X=dic.transform(Y)
Y_=np.dot(X,D)

filepath = r"C:\Users\ktmks\Documents\research\Python\Brain_DL"+"\\"
filename = "res_"+"AtomN-"   + str(N_COMPONENTS)\
          +"_SparseDegree-"  + str(TRANSFORM_N_NONZERO_COEFS)\
          +"_MaxIter-"       + str(MAX_ITER)
save_filename=filepath+filename+".pkl"

dill.dump_session(save_filename)
scipy.io.savemat(filename+".mat",{"D":D,"X":X,"Y_":Y_,"label":label})