def test_sparse_encode_input():
    n_components = 100
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    Xf = check_array(X, order='F')
    for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'):
        a = sparse_encode(X, V, algorithm=algo)
        b = sparse_encode(Xf, V, algorithm=algo)
        assert_array_almost_equal(a, b)
def test_with_sparse_code(components=np.loadtxt('components_of_convfeat.txt')):
    (X_train, y_train), (X_test, y_test) = util.load_feat_vec()
    X_train_codes = np.loadtxt('sparse_codes_of_convfeat.txt')
    clf = LogisticRegression(penalty='l1', multi_class='ovr')
    clf.fit(X_train_codes, y_train)
    X_test_codes = sparse_encode(X_test, components)
    print "mean accuracy", clf.score(X_test_codes, y_test)
예제 #3
0
def test_sparse_encode_error():
    n_components = 12
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    code = sparse_encode(X, V, alpha=0.001)
    assert_true(not np.all(code == 0))
    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
예제 #4
0
def run(dimension,raw_data_dir,out_dir):
	with open('{}/filename.list'.format(raw_data_dir), 'r') as fp:
		filenames = fp.read().splitlines()
	sensor_data = list()
	for filename in filenames:
		path = '{}/{}'.format(raw_data_dir, filename)
		with Timer('open {} with ALL sensors'.format(filename)):
			#data = np.genfromtxt(path, usecols=range(1,49)
			data = np.genfromtxt(path, usecols=[1, 4, 13, 16, 18, 26, 31, 32, 37, 38, 39, 40, 9, 11, 22, 23, 41, 10, 12, 24, 25, 29, 30, 42, 43, 44]
				, delimiter=',').tolist()
			print "# of data:", len(data)
			sensor_data.extend(data)
	with Timer('Sparse Coding...'):
		print "# of ALL data as a whole:", len(sensor_data)
		dl = sparse_coding(dimension, sensor_data,out_dir, 1, 10000, 0.00001)
	with open('{}/atoms'.format(out_dir), "w") as op:
		for component in dl.components_:
			line = ', '.join(str(e) for e in component)
        		op.write( line + '\n')

	code = sparse_encode(input_x, dl.components_)

	with open('{}/codes'.format(out_dir), "w") as op:
		for coefficient in code:
			line = ', '.join(str(e) for e in coefficient)
        		op.write( line + '\n')	

	with open('{}/filename.list'.format(raw_data_dir), 'r') as fp:
		filenames = fp.read().splitlines()
예제 #5
0
def test_sparse_encode_shapes():
    n_components = 12
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'):
        code = sparse_encode(X, V, algorithm=algo)
        assert_equal(code.shape, (n_samples, n_components))
def test_sparse_encode_positivity(positive):
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    for algo in ('lasso_lars', 'lasso_cd', 'lars', 'threshold'):
        code = sparse_encode(X, V, algorithm=algo, positive=positive)
        if positive:
            assert_true((code >= 0).all())
        else:
            assert_true((code < 0).any())

    try:
        sparse_encode(X, V, algorithm='omp', positive=positive)
    except ValueError:
        if not positive:
            raise
def test_sparse_encode_shapes_omp():
    rng = np.random.RandomState(0)
    algorithms = ['omp', 'lasso_lars', 'lasso_cd', 'lars', 'threshold']
    for n_components, n_samples in itertools.product([1, 5], [1, 9]):
        X_ = rng.randn(n_samples, n_features)
        dictionary = rng.randn(n_components, n_features)
        for algorithm, n_jobs in itertools.product(algorithms, [1, 3]):
            code = sparse_encode(X_, dictionary, algorithm=algorithm,
                                 n_jobs=n_jobs)
            assert_equal(code.shape, (n_samples, n_components))
def to_sparse(X,dim):

	sparse_dict = MiniBatchDictionaryLearning(dim)
	sparse_dict.fit(X)
	sparse_vectors = sparse_encode(X, sparse_dict.components_)

	for i in sparse_vectors:
		print i

	return sparse_vectors
    def predict(self, imgs, neuron_idx=None, penalty_lambda=None, algorithm=None):
        """ get neuron response to images

        Parameters
        ----------
        imgs

        Returns
        -------

        """
        imgs_array = make_2d_input_matrix(imgs)
        if neuron_idx is None:
            dict_to_use = self.w
        else:
            dict_to_use = self.w[neuron_idx:(neuron_idx + 1), :]

        if penalty_lambda is None:
            _lambda = self._lambda
        else:
            _lambda = penalty_lambda
        assert np.isscalar(_lambda)

        if algorithm is None:
            _algorithm = self.algorithm
        else:
            _algorithm = algorithm


        # let's call sparse encoder to do it!
        # no scaling at all!
        # having /nsample in objective function is exactly the same as sovling each problem separately.
        # the underlying function called is elastic net, and that function fits each column of y separately.
        # each column of y is each stimulus. This is because when passing imgs_array and dict_to_use to Elastic Net,
        # they are transposed. That is, y = imgs_array.T
        #
        # in the code there's also a subtle detail, where alpha is divided by number of pixels in each stimulus.
        # I haven't figured that out, but seems that's simply a detail for using ElasticNet to do this.
        if _algorithm in ['lasso_lars', 'lasso_cd']:
            response = sparse_encode(imgs_array, dict_to_use, alpha=_lambda, algorithm=_algorithm, max_iter=10000)
        else:
            assert _algorithm == 'spams'
            #print(imgs_array.dtype, dict_to_use.dtype, _lambda.shape)
            response = lasso(np.asfortranarray(imgs_array.T), D=np.asfortranarray(dict_to_use.T), lambda1=_lambda,
                             mode=2)
            response = response.T.toarray()  # because lasso returns sparse matrix...
        # this can be used for debugging, for comparison with SPAMS.
        # notice here I give per sample cost.
        self.last_cost_recon = 0.5 * np.sum((imgs_array - np.dot(response, dict_to_use)) ** 2, axis=1)
        self.last_cost_sparsity = _lambda * np.abs(response).sum(axis=1)
        assert self.last_cost_sparsity.shape == (imgs_array.shape[0], )
        assert self.last_cost_recon.shape == (imgs_array.shape[0],)
        self.last_cost = np.mean(self.last_cost_recon + self.last_cost_sparsity)

        return response
예제 #10
0
    def test_sparse_encode(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        _, dictionary, _ = decomposition.dict_learning(iris.data, 2, 1,
                                                       random_state=self.random_state)

        result = df.decomposition.sparse_encode(dictionary)
        expected = decomposition.sparse_encode(iris.data, dictionary)
        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_index_equal(result.index, df.data.index)
        self.assert_numpy_array_almost_equal(result.values, expected)
def sparse_coding(n_atom, input_x, out_dir):
	dictionary = get_dictionary(n_atom, input_x)
	code = sparse_encode(input_x, dictionary)
    
	np.set_printoptions(precision=3, suppress=True)
	#print code
	#print dictionary
	with open('{}/atoms'.format(out_dir), "w") as op:
		for component in dictionary:
			line = ', '.join(str(round(e,3)) for e in component)
        		op.write( line + '\n')
	with open('{}/codes'.format(out_dir), "w") as op:
		for coefficient in code:
			line = ', '.join(str(round(e,3)) for e in coefficient)
        		op.write( line + '\n')
	return code
def learning_sparse_coding(X, components=None):
    """
    http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.DictionaryLearning.html
    http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.sparse_encode.html
    """
    if components is None:
        print('Learning the dictionary...')
        t0 = time()
        diclearner = MiniBatchDictionaryLearning(n_components=100, verbose=True)
        components = diclearner.fit(X).components_
        np.savetxt('components_of_convfeat.txt', components)
        dt = time() - t0
        print('done in %.2fs.' % dt)

    codes = sparse_encode(X, components)
    np.savetxt('sparse_codes_of_convfeat.txt', codes)
예제 #13
0
def test_dict_learning_online_partial_fit():
    # this test was not actually passing before!
    raise SkipTest
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    dico1 = MiniBatchDictionaryLearning(n_components, n_iter=10, batch_size=1,
                                        shuffle=False, dict_init=V,
                                        random_state=0).fit(X)
    dico2 = MiniBatchDictionaryLearning(n_components, n_iter=1, dict_init=V,
                                        random_state=0)
    for ii, sample in enumerate(X):
        dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter)
        # if ii == 1: break
    assert_true(not np.all(sparse_encode(X, dico1.components_, alpha=100) ==
                           0))
    assert_array_equal(dico1.components_, dico2.components_)
예제 #14
0
def test_dict_learning_online_partial_fit():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    dict1 = MiniBatchDictionaryLearning(n_components, n_iter=10 * len(X),
                                        batch_size=1,
                                        alpha=1, shuffle=False, dict_init=V,
                                        random_state=0).fit(X)
    dict2 = MiniBatchDictionaryLearning(n_components, alpha=1,
                                        n_iter=1, dict_init=V,
                                        random_state=0)
    for i in range(10):
        for sample in X:
            dict2.partial_fit(sample[np.newaxis, :])

    assert not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0)
    assert_array_almost_equal(dict1.components_, dict2.components_,
                              decimal=2)
예제 #15
0
    def gabor_encode(self):
        patches = extract_patches_2d(
            self.img, (self.patch_size, self.patch_size)
        )
        patches = patches.reshape(patches.shape[0], -1)
        # code = sparse_encode(patches, self.kernels, algorithm='threshold', alpha=1)
        code = sparse_encode(
            patches, self.kernels, algorithm='lars', n_nonzero_coefs=2)

        idx = np.std(code, axis=1) > 0.3
        selected_patches = patches #[idx]
        selected_code = code #[idx]
        min_code, max_code = np.min(selected_code), np.max(selected_code)
        # print selected_patches
        c = 0
        s = 21
        for i in xrange(selected_code.shape[0]):
            print i

            plt.subplot(s, s * 2, c)
            plt.xticks(())
            plt.gca().set_ylim([min_code, max_code])
            plt.yticks(())
            plt.plot(selected_code[i])
            c += 1

            plt.subplot(s, s * 2, c)
            plt.xticks(())
            plt.yticks(())
            plt.imshow(selected_patches[i].reshape(
                self.patch_size, self.patch_size), cmap='gray', interpolation='none')
            c += 1
        plt.show()

        orientations = np.argmax(code, axis=1)
        activations = np.std(code, axis=1)
        orientations[activations < self.activation_threshold] = self.zero_value
        # blank_batches = np.ones((patches.shape[0], self.patch_size, self.patch_size)) * orientations[:, None, None]
        # recon = reconstruct_from_patches_2d(blank_batches, (self.img_height, self.img_width))
        # return recon
        return orientations.reshape(self.map_height, self.map_width)
예제 #16
0
def FindTopSCV(k, dic, Fout2, prompt):
    sh = (Fout2.shape[0], Fout2.shape[1], k, 3)
    cplist = np.zeros(sh)
    if prompt == 'SP':
        for j in range(Fout2.shape[1]):
            for i in range(Fout2.shape[0]):
                y = np.reshape(Fout2[i,j], (Fout2.shape[2],1))
                if y.all() == 0:
                    p = np.zeros(k)
                    p[0] = 1
                    lc = np.zeros((k,2))
                    lc[0,:] = [20,20]
                else:
                    try:
                        x_hat = CSRec_SP(k, dic, y)
                        (p,lc) = prob(k, x_hat, Fout2.shape[0])
                    except:
                        p = np.zeros(k)
                        p[0] = 1
                        lc = np.zeros((k,2))
                        lc[0,:] = [20,20]
                cplist[i,j,:,0] = p
                cplist[i,j,:,1:3] = lc
                print (i,j)
    elif prompt == 'OMP':
        for j in range(Fout2.shape[1]):
            for i in range(Fout2.shape[0]):
                y = np.reshape(Fout2[i,j], (Fout2.shape[2],1))
                # X = code * dic
                y = np.reshape(Fout2,(Fout2.shape[0]*Fout2.shape[1],Fout2.shape[2]))
                x_hat = sparse_encode(X = y, dictionary=dic.transpose(), n_nonzero_coefs=k)
                x_hat = x_hat.transpose()
                (p,lc) = prob(k, x_hat, Fout2.shape[0])
                cplist[i,j,:,0] = p
                cplist[i,j,:,1:3] = lc
                print (i,j)

    return cplist
def fft_handler(*args):
    global current_note_fft
    print(len(current_note_fft))
    fft = args[1].split()
    fft = np.array([float(i) for i in fft])
    n = normalize_vector(fft.reshape(1, -1))[0]
    if n is None:
        return
    current_note_fft += [n]
    if len(current_note_fft) == 10:
        s = sparse_encode(n.reshape(1, -1),
                          data_per_fret,
                          algorithm='lars',
                          n_nonzero_coefs=NONZERO_COEFS)
        s = s[0]
        a = np.argsort(s)
        coeffs = [s[i] for i in a[-NONZERO_COEFS:]]
        coeffs = normalize_vector(np.array(coeffs))[0]
        pitches = [guitar_notes[i] for i in a[-NONZERO_COEFS:]]
        print(pitches)
        print(coeffs)
        d = get_relevant_pitches(pitches, coeffs)
        sendMIDI_out(d)
예제 #18
0
def test_dict_learning_online_partial_fit():
    # this test was not actually passing before!
    raise SkipTest("Online dict-learning test fails.")
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V**2, axis=1)[:, np.newaxis]
    dico1 = MiniBatchDictionaryLearning(n_components,
                                        n_iter=10,
                                        batch_size=1,
                                        shuffle=False,
                                        dict_init=V,
                                        random_state=0).fit(X)
    dico2 = MiniBatchDictionaryLearning(n_components,
                                        n_iter=1,
                                        dict_init=V,
                                        random_state=0)
    for ii, sample in enumerate(X):
        dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter)
        # if ii == 1: break
    assert_true(not np.all(
        sparse_encode(X, dico1.components_, alpha=100) == 0))
    assert_array_equal(dico1.components_, dico2.components_)
예제 #19
0
def test_dict_learning_online_partial_fit():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V**2, axis=1)[:, np.newaxis]
    dict1 = MiniBatchDictionaryLearning(
        n_components,
        n_iter=10 * len(X),
        batch_size=1,
        alpha=1,
        shuffle=False,
        dict_init=V,
        random_state=0,
    ).fit(X)
    dict2 = MiniBatchDictionaryLearning(
        n_components, alpha=1, n_iter=1, dict_init=V, random_state=0
    )
    for i in range(10):
        for sample in X:
            dict2.partial_fit(sample[np.newaxis, :])

    assert not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0)
    assert_array_almost_equal(dict1.components_, dict2.components_, decimal=2)
예제 #20
0
print "X_train.shape", train_X.shape
print "Components shape", dl.components_.shape

# components = dl.components().reshape((n_components, n_features))
components = dl.components_

# Visualizing the components as images
component_titles = ["component %d" % i for i in range(components.shape[0])]
plot_gallery("Visualizing top components", components, component_titles, w, h, n_row=n_components / 10, n_col=10)
plt.show()

###############################################################################
# Sparse Encoding
print("\nSparse Encoding")
train_X_pca = np.zeros((len(train_X), n_components))
train_X_pca = sparse_encode(train_X[0:10], components, alpha=10, algorithm='omp')
np.set_printoptions(precision=3, suppress=True)
print train_X_pca
# for i in range(len(train_X)):
#     train_X_pca[i] = dl.transform(train_X[i])

test_X_pca = np.zeros((len(test_X), n_components))
test_X_pca = sparse_encode(test_X[0:10], components, alpha=10, algorithm='omp')
# for i in range(len(test_X)):
#     test_X_pca[i] = dl.transform(test_X[i])

print "train_X_pca.shape", train_X_pca.shape

###############################################################################
# Visualize reconstructed images
reconstructed_X = np.zeros((20, n_features))
예제 #21
0
    plt.plot(trajectory['x'], trajectory['y'])
    trajectory['x'] = []
    trajectory['y'] = []

plt.show()

alpha_schedule = [.2 / 5000., .5 / 5000., 1. / 5000., 2. / 5000., 5. / 5000.]

assert num_trajectories == len(trajectories)

for j, alpha in enumerate(alpha_schedule):
    print 'j = ', j, '; alpha = ', alpha
    from sklearn.decomposition import sparse_encode
    print 'running SC ', j
    HS = sparse_encode(model.W.get_value(),
                       X.T,
                       alpha=alpha,
                       algorithm='lasso_cd').T
    assert HS.shape == (5000, 1600)
    print 'done encoding'

    HS = np.abs(HS)

    if np.any(np.isnan(HS)):
        print 'has nans'

    if np.any(np.isinf(HS)):
        print 'has infs'

    print 'HS shape ', HS.shape
    print 'HS subtensor shape ', HS[0:num_trajectories].shape
    act_prob = (HS[:, 0:num_trajectories] > .01).mean(axis=0)
예제 #22
0
def learn_representation_for_labeled_data(labeled_examples, dictionary, max_iter):
    return sparse_encode(labeled_examples, dictionary, max_iter=max_iter)
예제 #23
0
파일: test.py 프로젝트: wyx0722/thesis
    def test_sparse_encode(self):
        """Test the sparse encode using admm behaves like sklearn's sparse_encode.

        After testing, we found that the order of the equations is reversed.
        Here is the problem that sparse_encode tries to solve:
                (C^*,) = argmin 0.5 || X - C D ||_2^2 + gamma * || C ||_1
                     (C)

        And here is the one that lasso_admm tries to solve
                (C^*,) = argmin 0.5 || X - D C ||_2^2 + gamma * || C ||_1
                     (C)
                    Where D is the dictionary

        The best way to compare them is to transpose EVERYTHING:
        X = C D
        and
        X_T = D_T C_T
        """
        from sklearn.decomposition import sparse_encode

        alpha = 1
        n_components = 6
        X = np.array([[
            1.76405235, 0.40015721, 0.97873798, 2.2408932, 1.86755799,
            -0.97727788, 0.95008842, -0.15135721
        ],
                      [
                          -0.10321885, 0.4105985, 0.14404357, 1.45427351,
                          0.76103773, 0.12167502, 0.44386323, 0.33367433
                      ],
                      [
                          1.49407907, -0.20515826, 0.3130677, -0.85409574,
                          -2.55298982, 0.6536186, 0.8644362, -0.74216502
                      ],
                      [
                          2.26975462, -1.45436567, 0.04575852, -0.18718385,
                          1.53277921, 1.46935877, 0.15494743, 0.37816252
                      ],
                      [
                          -0.88778575, -1.98079647, -0.34791215, 0.15634897,
                          1.23029068, 1.20237985, -0.38732682, -0.30230275
                      ],
                      [
                          -1.04855297, -1.42001794, -1.70627019, 1.9507754,
                          -0.50965218, -0.4380743, -1.25279536, 0.77749036
                      ],
                      [
                          -1.61389785, -0.21274028, -0.89546656, 0.3869025,
                          -0.51080514, -1.18063218, -0.02818223, 0.42833187
                      ],
                      [
                          0.06651722, 0.3024719, -0.63432209, -0.36274117,
                          -0.67246045, -0.35955316, -0.81314628, -1.7262826
                      ],
                      [
                          0.17742614, -0.40178094, -1.63019835, 0.46278226,
                          -0.90729836, 0.0519454, 0.72909056, 0.12898291
                      ],
                      [
                          1.13940068, -1.23482582, 0.40234164, -0.68481009,
                          -0.87079715, -0.57884966, -0.31155253, 0.05616534
                      ]])

        # start with sensible defaults
        dictionary = init_dictionary(X, n_components=n_components)

        code_sklearn = sparse_encode(X, dictionary, alpha=alpha)

        code_admm_T, costs = lasso_admm(X.T, dictionary.T, gamma=alpha)

        code_admm = code_admm_T.T

        # Compare the costs with svd.
        cost_sklearn = lasso_cost(X.T, dictionary.T, code_sklearn.T, alpha)
        cost_admm = lasso_cost(X.T, dictionary.T, code_admm.T, alpha)

        # Make sure admm is better than lars
        np.testing.assert_array_almost_equal(code_admm, code_sklearn)
        np.testing.assert_almost_equal(cost_admm, cost_sklearn)
예제 #24
0
	def bow_feature_extract(self, path):
		des = self.raw_feature_extract(path)
		out = sum(sparse_encode(des, self.mbdl.components_))
		out = np.array([out])
		return out
 def transform(self, sample):
     return sparse_encode(sample.T, self.dictionary.T,
                          algorithm='omp', n_nonzero_coefs=self.n_nonzero).T
    fmri_masked = fmri_masked[:, np.all(fmri_masked != 0, axis=0)]

    # DEFINING features and targets
    features = fmri_masked
    targets = target_int

    # Dictionary Learning on Target
    sparse_components = 200
    dict_sparse = DictionaryLearning(alpha=1,
                                     n_components=sparse_components,
                                     max_iter=3,
                                     verbose=3)
    dict_sparse.fit(features)
    Dt_0 = dict_sparse.components_
    Rt_0 = sparse_encode(features, dictionary=Dt_0)

    # Dictionary Learning on Source iter 2
    sparse_components = 300
    dict_sparse = MiniBatchDictionaryLearning(alpha=1,
                                              n_components=sparse_components,
                                              verbose=3,
                                              batch_size=10,
                                              n_iter=200)
    dict_sparse.fit(Rs_0)
    Ds_1 = dict_sparse.components_
    #Rs_1 = sparse_encode(Rs_0,dictionary=Ds_1)
    Rt_1 = sparse_encode(Rt_0, dictionary=Ds_1)

    run_range = range(12)
    feat_range = range(756)
예제 #27
0
print "X_train.shape", train_X.shape
print "Components shape", dl.components_.shape

# components = dl.components().reshape((n_components, n_features))
components = dl.components_

# Visualizing the components as images
component_titles = ["%d" % i for i in range(components.shape[0])]
plot_gallery("Visualizing top components", components, w, h, n_row=n_components / 10, n_col=10)
plt.show()

###############################################################################
# Sparse Encoding
print("\nSparse Encoding")
train_X_sc = np.zeros((10, n_components))
train_X_sc = sparse_encode(train_X, components, algorithm='lars')
np.set_printoptions(precision=1, suppress=False, linewidth=800)

test_X_sc = np.zeros((len(test_X), n_components))
test_X_sc = sparse_encode(test_X, components, algorithm='lars')

print "train_X_sc.shape", train_X_sc.shape

###############################################################################
# Visualize reconstructed images
reconstructed_X = np.zeros((20, n_features))
reconstructed_X_idx = np.random.choice(np.arange(len(reconstructed_X)), size=10, replace=False)
reconstructed_X[reconstructed_X_idx] = train_X[reconstructed_X_idx]
reconstructed_X[reconstructed_X_idx] = np.dot(train_X_sc[reconstructed_X_idx], components)

print "reconstructed_X.shape", reconstructed_X.shape
    
    a = sd.getData(256, return_decoded = True)
    
    
    axmin = np.min(a[0])-0.1
    axmax = np.max(a[0])+0.1
    for i in range(10):
        
        ax1.plot(np.linspace(start = 0, stop = sd.num_features, num = sd.num_features),a[0][a[1]==0][i].reshape(-1))
        ax1.set_ylim([axmin, axmax])
    print(a[0][a[1]==1].shape)
    
    for i in range(10):
        ax2.plot(np.linspace(start = 0, stop = sd.num_features, num = sd.num_features),a[0][a[1]==1][i].reshape(-1))
        ax2.set_ylim([axmin, axmax])
        
    plt.show()
    
    
    ###
    recoded = sparse_encode(X=decoded, dictionary=dictionary, n_nonzero_coefs= 20, alpha = 0.001)
    print("recoded.shape", recoded.shape)
    for i in range(num_datapoints):
        print("code/recode:")
        for k in range(num_codewords):
            print(round(codes[i,k],3), "   ",  round(recoded[i,k],3))
    ###
    
    
#def generate_codes(length):
    
예제 #29
0
# scipy.io.savemat('/home/jonny2/PycharmProjects/ML-algorithms/Projects/GWAS-SparseCoding/psychiatric.mat',
#                  mdict={'tr_dat': X_train, 'tt_dat': X_test, 'trls': y_train, 'ttls': y_test})


###############################################################################
# Sparse Representation
n_components = 25

# dl = DictionaryLearning(n_components, max_iter=15, n_jobs=4, verbose=2)
dl = KSVDSparseCoding(n_components, max_iter=5, verbose=1, approx=True)
dl.fit(X_s)

eigenfaces = dl.components_.T

print("Projecting the input data on the learned dictionary bases")
X_train_pca = sparse_encode(X_train, eigenfaces, algorithm='lasso_lars')
X_test_pca = sparse_encode(X_test, eigenfaces, algorithm='lasso_lars')

print "X_train_pca.shape", X_train_pca.shape
print "X_test_pca.shape", X_test_pca.shape

###############################################################################
# Train a SVM classification model
print("Fitting the classifier to the training set")
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],}
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
clf = clf.fit(X_train_pca, y_train)
print("Best estimator found by grid search:")
print(clf.best_estimator_)
예제 #30
0
파일: bug.py 프로젝트: cc13ny/galatea
import numpy as np
from sklearn.decomposition import sparse_encode
HS = sparse_encode( np.random.randn(108,1600), np.random.randn(108,5000), alpha = 1./5000., algorithm='lasso_lars').T

예제 #31
0
def plot_reconstruction_detail(shape, dictionary, n_components, scaled=False,
                               algorithm='omp', sorted=True, show_points=False, show_error=True,
                               figsize=None):
    # Compute reconstruction for given shape
    xy = len(shape) // 2
    if algorithm in ('omp', 'lars'):
        coefs = sparse_encode(shape[np.newaxis, :], dictionary, algorithm=algorithm,
                              n_nonzero_coefs=n_components, )
        recons = np.dot(coefs, dictionary)[0]
    elif algorithm == 'pca':
        if not isinstance(dictionary, PCA):
            raise ValueError('Must pass PCA object for PCA algorithm.')
        pca = dictionary
        dictionary = pca.components_
        X = shape[np.newaxis, :] - pca.mean_
        coefs = np.dot(X, dictionary[:n_components].T)
        recons = np.dot(coefs, dictionary[:n_components])[0] + pca.mean_
    error = np.sum((shape - recons) ** 2)

    # Prepare plotting
    if figsize is None:
        figsize = ((5 * n_components), 8)
    fig = plt.figure(figsize=figsize)
    markers = {'recons': '-o' if show_points else '-',
               'shapes': '--x' if show_points else '--'}
    xlim = 1.1 * shape[:xy].min(), 1.1 * shape[:xy].max()
    ylim = 1.1 * shape[xy:].min(), 1.1 * shape[xy:].max()

    # # Plot the reconstruction along the initial shape
    # plt.subplot(2, n_components+1, 1)
    # plt.plot(shape[:xy], shape[xy:], markers['shapes'], c='C0', lw=1.0)
    # plt.plot(recons[:xy], recons[xy:], markers['recons'], c='C1', lw=1.5)
    # plt.xlim(xlim); plt.ylim(ylim);
    # plt.tick_params(axis='both', bottom=False, labelbottom=False,
    #                 left=False, labelleft=False,)
    # plt.title('Error = {:.4f}'.format(error), fontsize=18)

    # Plot the components sorted by coefficient values
    # as well as the cumulative sum
    argsort = np.argsort(-np.abs(coefs[0])) if sorted else np.arange(n_components)
    assert len(np.where(coefs != 0)[0]) == n_components
    cumsum = np.zeros_like(shape)
    if algorithm == 'pca':
        cumsum += pca.mean_
    for i in range(n_components):
        coef = coefs[0][argsort][i]
        comp = dictionary[argsort][i]
        prevsum = cumsum
        cumsum = cumsum + coef * comp
        error = np.sum((shape - cumsum) ** 2)
        if scaled:
            comp = coef * comp

        # if algorithm == 'pca':
        #     comp += pca.mean_
        # plt.subplot(2, n_components+1, 2+i)
        # plt.plot(comp[:xy], comp[xy:], markers['recons'],
        #          c='C{}'.format((i+2)%10), lw=1.5)
        # plt.xlim(xlim); plt.ylim(ylim)
        # plt.tick_params(axis='both', bottom=False, labelbottom=False,
        #                 left=False, labelleft=False,)
        # plt.title('{:.4f}'.format(coef), fontsize=18)
        plt.subplot(2, n_components, 1 + i)
        plt.plot(comp[:xy], comp[xy:], markers['recons'],
                 c='C{}'.format((i + 2) % 10), lw=1.5)
        # loop
        plt.plot(np.array([comp[:xy][0], comp[:xy][-1]]),
                 np.array([comp[xy:][0], comp[xy:][-1]]),
                 markers['recons'], c='C{}'.format((i + 2) % 10), lw=1.5)
        plt.xlim(xlim);
        plt.ylim(ylim)
        plt.tick_params(axis='both', bottom=False, labelbottom=False,
                        left=False, labelleft=False, )
        plt.title('Coefficient = {:.2f}'.format(coef), fontsize=36)

        # plt.subplot(2, n_components+1, (n_components+3)+i)
        # plt.plot(prevsum[:xy], prevsum[xy:], markers['shapes'], lw=1.0)
        # plt.plot(cumsum[:xy], cumsum[xy:], markers['recons'], lw=1.5)
        # plt.xlim(xlim); plt.ylim(ylim)
        # plt.tick_params(axis='both', bottom=False, labelbottom=False,
        #                 left=False, labelleft=False,)
        plt.subplot(2, n_components, n_components + 1 + i)
        plt.plot(shape[:xy], shape[xy:], markers['shapes'], c='C0', lw=1.0)
        # loop
        plt.plot(np.array([shape[:xy][0], shape[:xy][-1]]),
                 np.array([shape[xy:][0], shape[xy:][-1]]),
                 markers['shapes'], c='C0', lw=1.0)
        plt.plot(cumsum[:xy], cumsum[xy:], markers['recons'], c='C1', lw=1.5)
        plt.plot(np.array([cumsum[:xy][0], cumsum[:xy][-1]]),
                 np.array([cumsum[xy:][0], cumsum[xy:][-1]]),
                 markers['recons'], c='C1', lw=1.5)
        plt.xlim(xlim);
        plt.ylim(ylim)
        plt.tick_params(axis='both', bottom=False, labelbottom=False,
                        left=False, labelleft=False, )
        plt.title('Error = {:.2f}'.format(error), fontsize=36)

    return fig
예제 #32
0
def active_support_elastic_net(X,
                               y,
                               alpha,
                               tau=1.0,
                               algorithm='spams',
                               support_init='knn',
                               support_size=100,
                               maxiter=40):
    """An active support based algorithm for solving the elastic net optimization problem
        min_{c} tau ||c||_1 + (1-tau)/2 ||c||_2^2 + alpha / 2 ||y - c X ||_2^2.
		
    Parameters
    -----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (1, n_features)
    alpha : float
    tau : float, default 1.0
    algorithm : string, default ``spams``
        Algorithm for computing solving the subproblems. Either lasso_lars or lasso_cd or spams
        (installation of spams package is required).
        Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1.
    support_init: string, default ``knn``
        This determines how the active support is initialized.
        It can be either ``knn`` or ``L2``.
    support_size: int, default 100
        This determines the size of the working set.
        A small support_size decreases the runtime per iteration while increase the number of iterations.
    maxiter: int default 40
        Termination condition for active support update.
		
    Returns
    -------
    c : shape n_samples
        The optimal solution to the optimization problem.
	"""
    n_samples = X.shape[0]

    if n_samples <= support_size:  # skip active support search for small scale data
        supp = np.arange(
            n_samples, dtype=int
        )  # this results in the following iteration to converge in 1 iteration
    else:
        if support_init == 'L2':
            L2sol = np.linalg.solve(
                np.identity(y.shape[1]) * alpha + np.dot(X.T, X), y.T)
            c0 = np.dot(X, L2sol)[:, 0]
            supp = np.argpartition(-np.abs(c0), support_size)[0:support_size]
        elif support_init == 'knn':
            supp = np.argpartition(-np.abs(np.dot(y, X.T)[0]),
                                   support_size)[0:support_size]

    curr_obj = float("inf")
    for _ in range(maxiter):
        Xs = X[supp, :]
        if algorithm == 'spams':
            cs = spams.lasso(np.asfortranarray(y.T),
                             D=np.asfortranarray(Xs.T),
                             lambda1=tau * alpha,
                             lambda2=(1.0 - tau) * alpha)
            cs = np.asarray(cs.todense()).T
        else:
            cs = sparse_encode(y, Xs, algorithm=algorithm, alpha=alpha)

        delta = (y - np.dot(cs, Xs)) / alpha

        obj = tau * np.sum(np.abs(cs[0])) + (1.0 - tau) / 2.0 * np.sum(
            np.power(cs[0], 2.0)) + alpha / 2.0 * np.sum(np.power(delta, 2.0))
        if curr_obj - obj < 1.0e-10 * curr_obj:
            break
        curr_obj = obj

        coherence = np.abs(np.dot(delta, X.T))[0]
        coherence[supp] = 0
        addedsupp = np.nonzero(coherence > tau + 1.0e-10)[0]

        if addedsupp.size == 0:  # converged
            break

        # Find the set of nonzero entries of cs.
        activesupp = supp[np.abs(cs[0]) > 1.0e-10]

        if activesupp.size > 0.8 * support_size:  # this suggests that support_size is too small and needs to be increased
            support_size = min(
                [round(max([activesupp.size, support_size]) * 1.1), n_samples])

        if addedsupp.size + activesupp.size > support_size:
            ord = np.argpartition(-coherence[addedsupp], support_size -
                                  activesupp.size)[0:support_size -
                                                   activesupp.size]
            addedsupp = addedsupp[ord]

        supp = np.concatenate([activesupp, addedsupp])

    c = np.zeros(n_samples)
    c[supp] = cs
    return c
예제 #33
0
#         coefficient = sparse_encode(result,
#                                     self.dictionary,
#                                     algorithm="omp",
#                                     n_nonzero_coefs=None,
#                                     alpha=None)
#         return coefficient


with open("basisShapesC64L0.1", "rb") as file:
    dictionary = sio.loadmat(file)['component']

with open("sorted_shapes-32.mat", "rb") as file:
    shapes = sio.loadmat(file)['shapes']
    targets = sio.loadmat(file)['target']
targets = targets.reshape((1700, 1))
print(targets.shape)
coefficients = sparse_encode(shapes,
                             dictionary,
                             algorithm="omp",
                             n_nonzero_coefs=None,
                             alpha=None)
a = {"coefficients": coefficients, "targets": targets}
sio.savemat("coefficients.mat", a)

# for i in np.count_nonzero(coefficients, 1):
#     print(i)
#
# recons = np.dot(coefficients, dictionary)
# errors = np.sum((shapes - recons) ** 2, axis=1)
# print(sum(errors) / len(errors))
예제 #34
0
def encode(X, dictionary):
    """
    Sparse coding
    """
    return decomp.sparse_encode(X, dictionary)
예제 #35
0
import numpy as np
from sklearn.decomposition import sparse_encode
HS = sparse_encode(np.random.randn(108, 1600),
                   np.random.randn(108, 5000),
                   alpha=1. / 5000.,
                   algorithm='lasso_lars').T
    trajectory['x'] = []
    trajectory['y'] = []

plt.show()


alpha_schedule = [ .2/5000., .5/5000., 1./5000., 2./5000., 5./5000. ]


assert num_trajectories == len(trajectories)

for j, alpha in enumerate(alpha_schedule):
    print 'j = ',j,'; alpha = ',alpha
    from sklearn.decomposition import sparse_encode
    print 'running SC ',j
    HS = sparse_encode( model.W.get_value(), X.T, alpha = alpha, algorithm='lasso_cd').T
    assert HS.shape == (5000,1600)
    print 'done encoding'

    HS = np.abs(HS)


    if np.any(np.isnan(HS)):
        print 'has nans'

    if np.any(np.isinf(HS)):
        print 'has infs'

    print 'HS shape ',HS.shape
    print 'HS subtensor shape ',HS[0:num_trajectories].shape
    act_prob = (HS[:,0:num_trajectories] > .01).mean(axis=0)
예제 #37
0
print(image.shape)
abundance = abundance_map((.5, .33333, .25, .2), 1, (75, 75))
data = np.reshape(image, (image.shape[0] * image.shape[1], image.shape[2]))
print(data.shape)
dictionary, keys = convert_library(library)
print(dictionary.shape)
imputer_data = Imputer()
imputer_dict = Imputer()
imputer_data.fit(data)
imputer_dict.fit(dictionary)
data = imputer_data.transform(data)
dictionary = imputer_dict.transform(dictionary)

sparse = sparse_encode(data,
                       dictionary,
                       algorithm='lasso_cd',
                       max_iter=1000,
                       n_nonzero_coefs=20,
                       alpha=2)

print(sparse.shape)

#output = np.reshape(sparse, (75,75,224))
numbers = []
for n in names:
    iter = 0
    for keys in sorted(library.keys()):
        iter += 1
        if n == keys:
            numbers.append(iter)
print(numbers)
used = np.zeros((data.shape[0], 0))
예제 #38
0
 if 'SC' in args['dimReductionType']:
     ####################################
     #          Sparse Coding           #
     ####################################
     print 'Sparse Coding:'
     # normalize every column respectively
     from sklearn.preprocessing import MinMaxScaler
     normalizer = MinMaxScaler() # feature range (0,1)
     dataArray_normalized = normalizer.fit_transform(dataArray)
     print 'normalized data:'
     print dataArray_normalized
     # reduce to the specified dimension
     from learnDic import sparse_coding
     from sklearn.decomposition import sparse_encode   
     dl = sparse_coding(reducedDimension, dataArray_normalized, 0.2, 1000, 0.0001)
     code = sparse_encode(dataArray_normalized, dl.components_)
     data_reduced = code
     print 'Reduced data:'
     print data_reduced
     print 'Dictionary:'
     print dl.components_    
     print 'iteration:', dl.n_iter_
 elif 'PCA' in args['dimReductionType']:
     ####################################
     #   Principal Component Analysis   #
     ####################################
     from matplotlib.mlab import PCA as mlabPCA
     print 'PCA:'
     myPCA = mlabPCA(dataArray)
     data_reduced = myPCA.Y[:,0:reducedDimension]# reduce to the specified dimension
     print 'Raw data:'
예제 #39
0
def elastic_net_subspace_clustering(X,
                                    gamma=50.0,
                                    gamma_nz=True,
                                    tau=1.0,
                                    algorithm='lasso_lars',
                                    active_support=True,
                                    active_support_params=None,
                                    n_nonzero=50):
    if algorithm in ('lasso_lars', 'lasso_cd') and tau < 1.0 - 1.0e-10:
        warnings.warn(
            'algorithm {} cannot handle tau smaller than 1. Using tau = 1'.
            format(algorithm))
        tau = 1.0

    if active_support == True and active_support_params == None:
        active_support_params = {}

    n_samples = X.shape[0]
    rows = np.zeros(n_samples * n_nonzero)
    cols = np.zeros(n_samples * n_nonzero)
    vals = np.zeros(n_samples * n_nonzero)
    curr_pos = 0

    for i in progressbar.progressbar(range(n_samples)):
        y = X[i, :].copy().reshape(1, -1)
        X[i, :] = 0

        if algorithm in ('lasso_lars', 'lasso_cd', 'spams'):
            if gamma_nz == True:
                coh = np.delete(np.absolute(np.dot(X, y.T)), i)
                alpha0 = np.amax(
                    coh) / tau  # value for which the solution is zero
                alpha = alpha0 / gamma
            else:
                alpha = 1.0 / gamma

            if active_support == True:
                c = active_support_elastic_net(X, y, alpha, tau, algorithm,
                                               **active_support_params)
            else:
                if algorithm == 'spams':
                    c = spams.lasso(np.asfortranarray(y.T),
                                    D=np.asfortranarray(X.T),
                                    lambda1=tau * alpha,
                                    lambda2=(1.0 - tau) * alpha)
                    c = np.asarray(c.todense()).T[0]
                else:
                    c = sparse_encode(y, X, algorithm=algorithm,
                                      alpha=alpha)[0]
        else:
            warnings.warn("algorithm {} not found".format(algorithm))

        index = np.flatnonzero(c)
        if index.size > n_nonzero:
            #  warnings.warn("The number of nonzero entries in sparse subspace clustering exceeds n_nonzero")
            index = index[np.argsort(-np.absolute(c[index]))[0:n_nonzero]]
        rows[curr_pos:curr_pos + len(index)] = i
        cols[curr_pos:curr_pos + len(index)] = index
        vals[curr_pos:curr_pos + len(index)] = c[index]
        curr_pos += len(index)

        X[i, :] = y


#   affinity = sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples)) + sparse.csr_matrix((vals, (cols, rows)), shape=(n_samples, n_samples))
    return sparse.csr_matrix((vals, (rows, cols)),
                             shape=(n_samples, n_samples))
예제 #40
0
print "X_train.shape", train_X.shape
print "Components shape", dl.components_.shape

# components = dl.components().reshape((n_components, n_features))
components = dl.components_

# Visualizing the components as images
component_titles = ["component %d" % i for i in range(components.shape[0])]
plot_gallery("Visualizing top components", components, component_titles, patch_w, patch_h, n_row=24, n_col=24)
plt.show()

###############################################################################
# Sparse Encoding
print("\nSparse Encoding")
train_X_pca = np.zeros((len(train_X_patches), n_components))
train_X_pca = sparse_encode(train_X_patches, components, algorithm='omp')
np.set_printoptions(precision=3, suppress=True)
print train_X_pca
# for i in range(len(train_X)):
#     train_X_pca[i] = dl.transform(train_X[i])

test_X_pca = np.zeros((len(test_X), n_components))
test_X_pca = sparse_encode(test_X_patches, components, algorithm='omp')
# for i in range(len(test_X)):
#     test_X_pca[i] = dl.transform(test_X[i])

print "train_X_pca.shape", train_X_pca.shape

###############################################################################
# Visualize reconstructed images
reconstructed_X = np.zeros((20, n_features))
예제 #41
0
def elastic_net_subspace_clustering(X,
                                    gamma=50.0,
                                    gamma_nz=True,
                                    tau=1.0,
                                    algorithm='lasso_lars',
                                    active_support=True,
                                    active_support_params=None,
                                    n_nonzero=50):
    """Elastic net subspace clustering (EnSC) [1]. 
    Compute self-representation matrix C from solving the following optimization problem
    min_{c_j} tau ||c_j||_1 + (1-tau)/2 ||c_j||_2^2 + alpha / 2 ||x_j - c_j X ||_2^2 s.t. c_jj = 0,
    where c_j and x_j are the j-th rows of C and X, respectively.
	
	Parameter ``algorithm`` specifies the algorithm for solving the optimization problem.
	``lasso_lars`` and ``lasso_cd`` are algorithms implemented in sklearn, 
    ``spams`` refers to the same algorithm as ``lasso_lars`` but is implemented in 
	spams package available at http://spams-devel.gforge.inria.fr/ (installation required)
    In principle, all three algorithms give the same result.	
    For large scale data (e.g. with > 5000 data points), use any of these algorithms in
	conjunction with ``active_support=True``. It adopts an efficient active support 
	strategy that solves the optimization problem by breaking it into a sequence of 
    small scale optimization problems as described in [1].
    If tau = 1.0, the method reduces to sparse subspace clustering with basis pursuit (SSC-BP) [2].
    If tau = 0.0, the method reduces to least squares regression (LSR) [3].
	Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1.
    Parameters
    -----------
    X : array-like, shape (n_samples, n_features)
        Input data to be clustered
    gamma : float
    gamma_nz : boolean, default True
        gamma and gamma_nz together determines the parameter alpha. When ``gamma_nz = False``, 
        alpha = gamma. When ``gamma_nz = True``, then alpha = gamma * alpha0, where alpha0 is 
        the largest number such that the solution to the optimization problem with alpha = alpha0
		is the zero vector (see Proposition 1 in [1]). Therefore, when ``gamma_nz = True``, gamma
        should be a value greater than 1.0. A good choice is typically in the range [5, 500].	
    tau : float, default 1.0
        Parameter for elastic net penalty term. 
        When tau = 1.0, the method reduces to sparse subspace clustering with basis pursuit (SSC-BP) [2].
        When tau = 0.0, the method reduces to least squares regression (LSR) [3].
    algorithm : string, default ``lasso_lars``
        Algorithm for computing the representation. Either lasso_lars or lasso_cd or spams 
        (installation of spams package is required).
        Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1.
    n_nonzero : int, default 50
        This is an upper bound on the number of nonzero entries of each representation vector. 
        If there are more than n_nonzero nonzero entries,  only the top n_nonzero number of
        entries with largest absolute value are kept.
    active_support: boolean, default True
        Set to True to use the active support algorithm in [1] for solving the optimization problem.
        This should significantly reduce the running time when n_samples is large.
    active_support_params: dictionary of string to any, optional
        Parameters (keyword arguments) and values for the active support algorithm. It may be
        used to set the parameters ``support_init``, ``support_size`` and ``maxiter``, see
        ``active_support_elastic_net`` for details. 
        Example: active_support_params={'support_size':50, 'maxiter':100}
        Ignored when ``active_support=False``
	
    Returns
    -------
    representation_matrix_ : csr matrix, shape: n_samples by n_samples
        The self-representation matrix.
	
    References
    -----------	
	[1] C. You, C.-G. Li, D. Robinson, R. Vidal, Oracle Based Active Set Algorithm for Scalable Elastic Net Subspace Clustering, CVPR 2016
	[2] E. Elhaifar, R. Vidal, Sparse Subspace Clustering: Algorithm, Theory, and Applications, TPAMI 2013
    [3] C. Lu, et al. Robust and efficient subspace segmentation via least squares regression, ECCV 2012
    """
    if algorithm in ('lasso_lars', 'lasso_cd') and tau < 1.0 - 1.0e-10:
        warnings.warn(
            'algorithm {} cannot handle tau smaller than 1. Using tau = 1'.
            format(algorithm))
        tau = 1.0

    if active_support == True and active_support_params == None:
        active_support_params = {}

    n_samples = X.shape[0]
    rows = np.zeros(n_samples * n_nonzero)
    cols = np.zeros(n_samples * n_nonzero)
    vals = np.zeros(n_samples * n_nonzero)
    curr_pos = 0

    #     for i in progressbar.progressbar(range(n_samples)):
    for i in range(n_samples):
        #    if i % 1000 == 999:
        #        print('SSC: sparse coding finished {i} in {n_samples}'.format(i=i, n_samples=n_samples))
        y = X[i, :].copy().reshape(1, -1)
        X[i, :] = 0

        if algorithm in ('lasso_lars', 'lasso_cd', 'spams'):
            if gamma_nz == True:
                coh = np.delete(np.absolute(np.dot(X, y.T)), i)
                alpha0 = np.amax(
                    coh) / tau  # value for which the solution is zero
                alpha = alpha0 / gamma
            else:
                alpha = 1.0 / gamma

            if active_support == True:
                c = active_support_elastic_net(X, y, alpha, tau, algorithm,
                                               **active_support_params)
            else:
                if algorithm == 'spams':
                    c = spams.lasso(np.asfortranarray(y.T),
                                    D=np.asfortranarray(X.T),
                                    lambda1=tau * alpha,
                                    lambda2=(1.0 - tau) * alpha)
                    c = np.asarray(c.todense()).T[0]
                else:
                    c = sparse_encode(y, X, algorithm=algorithm,
                                      alpha=alpha)[0]
        else:
            warnings.warn("algorithm {} not found".format(algorithm))

        index = np.flatnonzero(c)
        if index.size > n_nonzero:
            #  warnings.warn("The number of nonzero entries in sparse subspace clustering exceeds n_nonzero")
            index = index[np.argsort(-np.absolute(c[index]))[0:n_nonzero]]
        rows[curr_pos:curr_pos + len(index)] = i
        cols[curr_pos:curr_pos + len(index)] = index
        vals[curr_pos:curr_pos + len(index)] = c[index]
        curr_pos += len(index)

        X[i, :] = y


#   affinity = sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples)) + sparse.csr_matrix((vals, (cols, rows)), shape=(n_samples, n_samples))
    return sparse.csr_matrix((vals, (rows, cols)),
                             shape=(n_samples, n_samples))
def elastic_net_subspace_clustering(X,
                                    gamma=50.0,
                                    gamma_nz=True,
                                    tau=1.0,
                                    algorithm='lasso_lars',
                                    active_support=True,
                                    active_support_params=None,
                                    n_nonzero=50):
    """Elastic net subspace clustering (EnSC) [1]. 
    	
    References
    -----------	
	[1] C. You, C.-G. Li, D. Robinson, R. Vidal, Oracle Based Active Set Algorithm for Scalable Elastic Net Subspace Clustering, CVPR 2016
	[2] E. Elhaifar, R. Vidal, Sparse Subspace Clustering: Algorithm, Theory, and Applications, TPAMI 2013
    [3] C. Lu, et al. Robust and efficient subspace segmentation via least squares regression, ECCV 2012
    """
    if algorithm in ('lasso_lars', 'lasso_cd') and tau < 1.0 - 1.0e-10:
        warnings.warn(
            f'algorithm {algorithm} cannot handle tau smaller than 1. Using tau = 1'
        )
        tau = 1.0

    if active_support == True and active_support_params == None:
        active_support_params = {}

    n_samples = X.shape[0]
    rows = np.zeros(n_samples * n_nonzero)
    cols = np.zeros(n_samples * n_nonzero)
    vals = np.zeros(n_samples * n_nonzero)
    curr_pos = 0

    for i in range(n_samples):
        y = X[i, :].copy().reshape(1, -1)
        X[i, :] = 0

        if algorithm in ('lasso_lars', 'lasso_cd', 'spams'):
            if gamma_nz == True:
                coh = np.delete(np.absolute(np.dot(X, y.T)), i)
                alpha0 = np.amax(
                    coh) / tau  # value for which the solution is zero
                alpha = alpha0 / gamma
            else:
                alpha = 1.0 / gamma

            if active_support == True:
                c = active_support_elastic_net(X, y, alpha, tau, algorithm,
                                               **active_support_params)
            else:
                if algorithm == 'spams':
                    c = spams.lasso(np.asfortranarray(y.T),
                                    D=np.asfortranarray(X.T),
                                    lambda1=tau * alpha,
                                    lambda2=(1.0 - tau) * alpha)
                    c = np.asarray(c.todense()).T[0]
                else:
                    c = sparse_encode(y, X, algorithm=algorithm,
                                      alpha=alpha)[0]
        else:
            warnings.warn("algorithm {} not found".format(algorithm))

        index = np.flatnonzero(c)
        if index.size > n_nonzero:
            index = index[np.argsort(-np.absolute(c[index]))[0:n_nonzero]]
        rows[curr_pos:curr_pos + len(index)] = i
        cols[curr_pos:curr_pos + len(index)] = index
        vals[curr_pos:curr_pos + len(index)] = c[index]
        curr_pos += len(index)

        X[i, :] = y

    return sparse.csr_matrix((vals, (rows, cols)),
                             shape=(n_samples, n_samples))
예제 #43
0
def test_unknown_method():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    with pytest.raises(ValueError):
        sparse_encode(X, V, algorithm="<unknown>")
예제 #44
0
def active_support_elastic_net(X,
                               y,
                               alpha,
                               tau=1.0,
                               algorithm='spams',
                               support_init='knn',
                               support_size=100,
                               maxiter=40):
    n_samples = X.shape[0]

    if n_samples <= support_size:  # skip active support search for small scale data
        supp = np.arange(
            n_samples, dtype=int
        )  # this results in the following iteration to converge in 1 iteration
    else:
        if support_init == 'L2':
            L2sol = np.linalg.solve(
                np.identity(y.shape[1]) * alpha + np.dot(X.T, X), y.T)
            c0 = np.dot(X, L2sol)[:, 0]
            supp = np.argpartition(-np.abs(c0), support_size)[0:support_size]
        elif support_init == 'knn':
            supp = np.argpartition(-np.abs(np.dot(y, X.T)[0]),
                                   support_size)[0:support_size]

    curr_obj = float("inf")
    for _ in range(maxiter):
        Xs = X[supp, :]
        if algorithm == 'spams':
            cs = spams.lasso(np.asfortranarray(y.T),
                             D=np.asfortranarray(Xs.T),
                             lambda1=tau * alpha,
                             lambda2=(1.0 - tau) * alpha)
            cs = np.asarray(cs.todense()).T
        else:
            cs = sparse_encode(y, Xs, algorithm=algorithm, alpha=alpha)

        delta = (y - np.dot(cs, Xs)) / alpha

        obj = tau * np.sum(np.abs(cs[0])) + (1.0 - tau) / 2.0 * np.sum(
            np.power(cs[0], 2.0)) + alpha / 2.0 * np.sum(np.power(delta, 2.0))
        if curr_obj - obj < 1.0e-10 * curr_obj:
            break
        curr_obj = obj

        coherence = np.abs(np.dot(delta, X.T))[0]
        coherence[supp] = 0
        addedsupp = np.nonzero(coherence > tau + 1.0e-10)[0]

        if addedsupp.size == 0:  # converged
            break

        # Find the set of nonzero entries of cs.
        activesupp = supp[np.abs(cs[0]) > 1.0e-10]

        if activesupp.size > 0.8 * support_size:  # this suggests that support_size is too small and needs to be increased
            support_size = min(
                [round(max([activesupp.size, support_size]) * 1.1), n_samples])

        if addedsupp.size + activesupp.size > support_size:
            ord = np.argpartition(-coherence[addedsupp], support_size -
                                  activesupp.size)[0:support_size -
                                                   activesupp.size]
            addedsupp = addedsupp[ord]

        supp = np.concatenate([activesupp, addedsupp])

    c = np.zeros(n_samples)
    c[supp] = cs
    return c