def dictionay_learning_MHOF_online(training_samples_num=400):
    from MHOF_Extraction import MHOF_Extraction
    from MHOF_histogram_block import MHOF_histogram_block
    from sklearn.decomposition import MiniBatchDictionaryLearning
    import numpy as np
    import cv2
    import video
    cam=video.create_capture('Crowd-Activity-All.avi')
    height_block_num=4
    width_block_num=5
    bin_num=16
    ret,prev=cam.read()
    ret,img=cam.read()
    flow_H=MHOF_Extraction(prev,img)
    flow_hist_H=MHOF_histogram_block(flow_H,height_block_num,width_block_num,bin_num)
    flow_hist_H=np.reshape(flow_hist_H,[1,flow_hist_H.size])
    #  error!!!!
    dico=MiniBatchDictionaryLearning(1,alpha=1,n_iter=500)
    dic=dico.fit(flow_hist_H).components_
    for i in range(training_samples_num):
        ret,img=cam.read()
        flow_H=MHOF_Extraction(prev,img)
        flow_hist_H=MHOF_histogram_block(flow_H,height_block_num,width_block_num,bin_num)
        dico=MiniBatchDictionaryLearing(i+1,alpha=1,n_iter=500,dict_init=dic)
        dic=dico.fit(flow_hist_H).components
    return dic

        
예제 #2
0
def sklearn_check(img, patch_size, dic_size, T=1000):
    patch_shape = (patch_size, patch_size)
    patches = extract_patches_2d(img, patch_shape)
    patches = patches.reshape(patches.shape[0], -1)
    patches = center(patches)
    dl = MiniBatchDictionaryLearning(dic_size, n_iter=T)
    dl.fit(patches)
    D = dl.components_.T
    return D
def to_sparse(X,dim):

	sparse_dict = MiniBatchDictionaryLearning(dim)
	sparse_dict.fit(X)
	sparse_vectors = sparse_encode(X, sparse_dict.components_)

	for i in sparse_vectors:
		print i

	return sparse_vectors
예제 #4
0
class BOW_sparsecoding(BOW):

	def codebook(self):
		self.mbdl =  MiniBatchDictionaryLearning(self.N_codebook)
		self.mbdl.fit(self.raw_features)
		

	def bow_feature_extract(self, path):
		des = self.raw_feature_extract(path)
		out = sum(sparse_encode(des, self.mbdl.components_))
		out = np.array([out])
		return out
예제 #5
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from cStringIO import StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1)
    dico.fit(X)
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2)
    dico.fit(X)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=1)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=2)
    sys.stdout = old_stdout
    assert_true(dico.components_.shape == (n_components, n_features))
예제 #6
0
def buildmodel2():
    "生成有眼镜-无眼镜pair模型"
    modelrec = np.load('cut_rec.npy')
    modelglass = np.load('glassline.npy')[:modelrec.shape[0]]

    linkedmodel = np.empty((modelrec.shape[0],modelrec.shape[1]+modelglass.shape[1]),'f')
    linkedmodel[:,:modelrec.shape[1]]=modelrec
    linkedmodel[:,modelrec.shape[1]:]=modelglass

    #Train
    from sklearn.decomposition import MiniBatchDictionaryLearning
    learning = MiniBatchDictionaryLearning(500,verbose=True)
    learning.fit(linkedmodel)
    import cPickle
    cPickle.dump(learning,file('sparselinked','wb'),-1)
예제 #7
0
def main(games_path = None):
    
    if games_path == None:
        games_path = 'specmine/data/go_games/2010-01.pickle.gz'

    with specmine.util.openz(games_path) as games_file:
        games = pickle.load(games_file)

    boards = None # numpy array nx9x9 
    for game in games:
        if boards == None: 
            boards = games[game].grids
        else:
            boards = numpy.vstack((boards,games[game].grids))

    print 'boards shape: ', boards.shape

    boards = boards.reshape((boards.shape[0],-1))

    print 'boards reshaped: ', boards.shape

    print 'Learning the dictionary... '
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_atoms=100, alpha=1, n_iter=500)
    V = dico.fit(boards).components_
    dt = time() - t0
    print 'done in %.2fs.' % dt

    #pl.figure(figsize=(4.2, 4))
    for i, comp in enumerate(V[:100]):
        pl.subplot(10, 10, i + 1)
        pl.imshow(comp, cmap=pl.cm.gray_r) # interpolation='nearest')
        pl.xticks(())
        pl.yticks(())
예제 #8
0
파일: sparsity.py 프로젝트: rousseau/fbrain
def scskl_dico_learning(list_pickled_array,n_atoms,maxepoch=5,maxiter=100):
  D = None
  for e in range(maxepoch):
    for a in list_pickled_array:
      data = joblib.load(a)
      dico = MiniBatchDictionaryLearning(n_components=n_atoms, n_iter=maxiter, dict_init=D)
      D = dico.fit(data).components_.astype(np.float32)
  return D      
예제 #9
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from sklearn.externals.six.moves import cStringIO as StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1,
                                       random_state=0)
    dico.fit(X)
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2,
                                       random_state=0)
    dico.fit(X)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=1,
                         random_state=0)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=2,
                         random_state=0)
    sys.stdout = old_stdout
    assert_true(dico.components_.shape == (n_components, n_features))
예제 #10
0
def create_dictionaries(n_codewords=20):
	dataset_features = np.load('MSR_Features_hog-hof-skel1360423760.27.dat')
	hogs = []
	hofs = []
	skels = []
	for n in dataset_features.keys():
		hogs +=	dataset_features[n]['hog']
		hofs +=	dataset_features[n]['hof']
		skels += [normalize_skeleton(dataset_features[n]['skel_world'])]

	''' Input should be features[n_samples, n_features] '''
	hogs = np.vstack(hogs)
	hofs = np.vstack(hofs)
	skels = np.vstack(skels)

	hog_dict = MiniBatchDictionaryLearning(n_codewords, n_jobs=-1, verbose=True, transform_algorithm='lasso_lars')
	hog_dict.fit(hogs)
	hof_dict = MiniBatchDictionaryLearning(n_codewords, n_jobs=-1, verbose=True, transform_algorithm='lasso_lars')
	hof_dict.fit(hofs)
	skels_dict = MiniBatchDictionaryLearning(n_codewords, n_jobs=-1, verbose=True, transform_algorithm='lasso_lars')
	skels_dict.fit(skels)

	feature_dictionaries = {'hog':hog_dict, 'hof':hof_dict, 'skel':skels_dict}

	with open('MSR_Dictionaries_hog-hof-skel_%f.dat'%time.time(), 'wb') as outfile:
	    pickle.dump(feature_dictionaries, outfile, protocol=pickle.HIGHEST_PROTOCOL)
예제 #11
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from io import StringIO
    import sys

    old_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1,
                                           random_state=0)
        dico.fit(X)
        dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2,
                                           random_state=0)
        dico.fit(X)
        dict_learning_online(X, n_components=n_components, alpha=1, verbose=1,
                             random_state=0)
        dict_learning_online(X, n_components=n_components, alpha=1, verbose=2,
                             random_state=0)
    finally:
        sys.stdout = old_stdout

    assert dico.components_.shape == (n_components, n_features)
def learning_sparse_coding(X, components=None):
    """
    http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.DictionaryLearning.html
    http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.sparse_encode.html
    """
    if components is None:
        print('Learning the dictionary...')
        t0 = time()
        diclearner = MiniBatchDictionaryLearning(n_components=100, verbose=True)
        components = diclearner.fit(X).components_
        np.savetxt('components_of_convfeat.txt', components)
        dt = time() - t0
        print('done in %.2fs.' % dt)

    codes = sparse_encode(X, components)
    np.savetxt('sparse_codes_of_convfeat.txt', codes)
예제 #13
0
def train_sparse_coding(feature_list, patch_list, dict_size=256, transform_alpha=0.5, n_iter=50):
    """
    使用mini batch训练稀疏编码
    #feature_list 表示要训练的特征的列表
    #patch_list 表示结果patch的列表

    :return sc_list
    """
    sc_list = []
    i = 0
    for feature, patch in zip(feature_list, patch_list):
        i = i + 1
        '''
        由于组合数值大小比例的问题,稀疏编码可能忽略较小的特征,下面的×10需要用别的特征归一化方法代替
        相关性越大,则每个向量都是有用的,所以需要更长的时间进行训练。
        '''
        dico = None
        X = np.concatenate((feature, patch), axis=1)

        if len(X) > 100000:
            np.random.shuffle(X)
            X = X[:90000]

        if len(X) < 5000:
            print "进入DictionaryLearning状态"
            dico = MiniBatchDictionaryLearning(batch_size=1000, transform_algorithm='lasso_lars', fit_algorithm='lars',
                                               transform_n_nonzero_coefs=5, n_components=len(X)/50,
                                               dict_init=X[:len(X)/50],
                                               n_iter=n_iter, transform_alpha=transform_alpha, verbose=10, n_jobs=-1)
        else:
            print "进入MiniBatchDictionaryLearning状态"
            dico = MiniBatchDictionaryLearning(batch_size=1000, transform_algorithm='lasso_lars', fit_algorithm='lars',
                                               transform_n_nonzero_coefs=5, n_components=len(X)/50,
                                               dict_init=X[:len(X)/50],
                                               n_iter=n_iter, transform_alpha=transform_alpha, verbose=10, n_jobs=-1)
        V = dico.fit(X).components_
        sc_list.append(V)

        file_name = "./tmp_file/_tmp_sc_list_new_clsd_raw_%d.pickle" % (i)
        sc_file = open(file_name, 'wb')
        cPickle.dump(sc_list, sc_file, 1)
        sc_file.close()

    return sc_list
# SPARSITY ON IMAGENET
# SHUFFELING
ind = range(len(imagenet_targets))
np.random.shuffle(ind)
imagenet_targets = imagenet_targets[ind]
imagenet_features = imagenet_features[ind, :]

# Dictionary Learning on Source
sparse_components = 200
dict_sparse = MiniBatchDictionaryLearning(alpha=1,
                                          n_components=sparse_components,
                                          verbose=3,
                                          batch_size=10,
                                          n_iter=200)
dict_sparse.fit(imagenet_features)
Ds_0 = dict_sparse.components_

coder = SparseCoder(dictionary=Ds_0)
Rs_0 = coder.transform(imagenet_features)

# classification using sparse features
from sklearn import cross_validation

model = OneVsRestClassifier(LinearSVC(random_state=0))
parameters = {'estimator__C': [0.01, 0.1, 1, 10]}
clf = grid_search.GridSearchCV(model, parameters, score_func=accuracy_score)
scores = cross_validation.cross_val_score(clf, Rs_0, imagenet_targets, cv=10)

#
patch_size = (m, m)

patches = extract_patches_2d(img1_gray_re, patch_size)
patches = patches.reshape(patches.shape[0], -1)
# remove the mean value and do the normalisation
patches -= np.mean(patches, axis=0)
patches /= np.std(patches, axis=0)
print('done in %.2fs.' % (time() - t0))
print(patches.shape)

# Learn the dictionary from reference patches

print('Learning the dictionary...')
t0 = time()
dico = MiniBatchDictionaryLearning(n_components=200, alpha=0.5, n_iter=400) #TODO:check with different parameters
V = dico.fit(patches).components_
dt = time() - t0
print('done in %.2fs.' % dt)

# show the learned dictionary as patches
plt.figure(figsize=(6, 6))
for i, comp in enumerate(V[:100]): # we show the 100 first patches
    plt.subplot(10, 10, i + 1)
    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,interpolation='nearest')
    plt.xticks(())
    plt.yticks(())
plt.suptitle('Dictionary learned from patches\n' + 'Train time %.1fs on %d patches' % (dt, len(patches)), fontsize=16)
plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)


########################################
def test_dict_learning_online_estimator_shapes():
    n_components = 5
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, random_state=0)
    dico.fit(X)
    assert_true(dico.components_.shape == (n_components, n_features))
예제 #17
0
def learn_basis_from_unlabeled_data(unlabeled_examples, num_components, alpha, max_iter):
    dic = MiniBatchDictionaryLearning(n_components=num_components, alpha=alpha, n_iter=max_iter)
    return dic.fit(unlabeled_examples).components_
class SparseApproxSpectrum(object):
    """class for 2D patch analysis of audio files
    initialization:
    	patch_size - size of time-frequency 2D patches in spectrogram units (freq,time) [(12,12)]
    	max_samples - if num audio patches exceeds this threshold, randomly sample spectrum [1000000]
        **omp_args - keyword arguments to OrthogonalMatchingPursuit(...) [None]
    """
    def __init__(self, patch_size=(12,12), max_samples=1000000, **omp_args):
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.omp = OrthogonalMatchingPursuit(**omp_args)
        self.D = None
        self.data = None
        self.components = None
        self.zscore=False
        self.log_amplitude=False

    def _extract_data_patches(self, X, zscore, log_amplitude):
    	"utility method for converting spectrogram data to 2D patches "
        self.zscore=zscore
        self.log_amplitude=log_amplitude
        self.X = X
        if self.log_amplitude:
            X = np.log(1+X)
        data = extract_patches_2d(X, self.patch_size)
        data = data.reshape(data.shape[0], -1)
        if len(data)>self.max_samples:
            data = np.random.permutation(data)[:self.max_samples]
        print data.shape
        if self.zscore:
            self.mn = np.mean(data, axis=0) 
            self.std = np.std(data, axis=0)
            data -= self.mn
            data /= self.std
        self.data = data

    def make_gabor_field(self, X, zscore=True, log_amplitude=True, thetas=range(4), 
    		sigmas=(1,3), frequencies=(0.05, 0.25)) :
        """Given a spectrogram, prepare 2D patches and Gabor filter bank kernels
        inputs:
           X - spectrogram data (frequency x time)
           zscore - whether to zscore the ensemble of 2D patches [True]
           log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True]
           thetas - list of 2D Gabor filter orientations in units of pi/4. [range(4)]
           sigmas - list of 2D Gabor filter standard deviations in oriented direction [(1,3)]
           frequencies - list of 2D Gabor filter frequencies [(0.05,0.25)]
        outputs:
           self.data - 2D patches of input spectrogram
           self.D.components_ - Gabor dictionary of thetas x sigmas x frequencies atoms
        """
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components = len(thetas)*len(sigmas)*len(frequencies)
        self.thetas = thetas
        self.sigmas = sigmas
        self.frequencies = frequencies
        a,b = self.patch_size
        self.kernels = []
        for theta in thetas:
            theta = theta / 4. * np.pi
            for sigma in sigmas:
                for frequency in frequencies:
                    kernel = np.real(gabor_kernel(frequency, theta=theta,
                                                  sigma_x=sigma, sigma_y=sigma))
                    c,d = kernel.shape
                    if c<=a:
                        z = np.zeros(self.patch_size)
                        z[(a/2-c/2):(a/2-c/2+c),(b/2-d/2):(b/2-d/2+d)] = kernel
                    else:
                        z = kernel[(c/2-a/2):(c/2-a/2+a),(d/2-b/2):(d/2-b/2+b)]
                    self.kernels.append(z.flatten())
        class Bunch:
            def __init__(self, **kwds):
                self.__dict__.update(kwds)
        self.D = Bunch(components_ = np.vstack(self.kernels))

    def extract_codes(self, X, n_components=16, zscore=True, log_amplitude=True, **mbl_args):
    	"""Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data
        inputs:
            X - spectrogram data (frequency x time)
    	    n_components - how many components to extract [16]
            zscore - whether to zscore the ensemble of 2D patches [True]
            log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True]
            **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None]
        outputs:
            self.data - 2D patches of input spectrogram
            self.D.components_ - dictionary of learned 2D atoms for sparse coding
        """
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components = n_components
        self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, **mbl_args)
        print "Dictionary learning from data..."
        self.D = self.dico.fit(self.data)

    def plot_codes(self, cbar=False, show_axis=False, **kwargs):
        "plot the learned or generated 2D sparse code dictionary"
        N = int(np.ceil(np.sqrt(self.n_components)))
        kwargs.setdefault('cmap', plt.cm.gray_r)
        kwargs.setdefault('origin','bottom')
        kwargs.setdefault('interpolation','nearest')
        for i, comp in enumerate(self.D.components_):
            plt.subplot(N, N, i+1)
            plt.imshow(comp.reshape(self.patch_size), **kwargs)
            if cbar:
                plt.colorbar()
            if not show_axis:
                plt.axis('off')
            plt.xticks(())
            plt.yticks(())
            plt.title('%d'%(i))
        plt.suptitle('Dictionary of Spectrum Patches\n', fontsize=14)
        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav', **mbl_args):
    	"""apply dictionary learning to entire directory of audio files (requires LOTS of RAM)
            inputs:
                **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None]
        """
        flist=glob.glob(dir_expr)
        self.X = np.vstack([br.feature_scale(br.LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T
        self.D = extract_codes(self.X, **mbl_args)

    def _get_approximation_coefs(self, data, components):
    	"""utility function to fit dictionary components to data
    	inputs:
    		data - spectrogram data (frqeuency x time) [None]
    	  components - the dictionary components to fit to the data [None]
        """
        w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data])
        return w

    def reconstruct_spectrum(self, w=None, randomize=False):
    	"""reconstruct by fitting current 2D dictionary to self.data 
        inputs:
            w - per-component reconstruction weights [None=calculate weights]
            randomize - randomly permute components after getting weights [False]
        returns:
            self.X_hat - spectral reconstruction of self.data
        """
        data = self.data
        components = self.D.components_
        if w is None:
            self.w = self._get_approximation_coefs(data, components)
            w = self.w
        if randomize:
            components = np.random.permutation(components)
        recon = np.dot(w, components)
        if self.zscore:
            recon = recon * self.std
            recon = recon + self.mn
        recon = recon.reshape(-1, *self.patch_size)
        self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape)
        if self.log_amplitude:
            self.X_hat = np.exp(self.X_hat) - 1.0 # invert log transform

    def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, rectify=True, **kwargs):
    	"""fit each dictionary component to self.data
        inputs:
            w - per-component reconstruction weights [None=calculate weights]
            randomize - randomly permute components after getting weights [False]
            plotting - whether to subplot individual spectrum reconstructions [True]
            rectify- remove negative ("dark energy") from individual reconstructions [True]
            **kwargs - keyword arguments for plotting
        returns:
            self.X_hat_l - list of indvidual spectrum reconstructions per dictionary atom
        """
        omp_args = {}
        self.reconstruct_spectrum(w, randomize, **omp_args)
        w, components = self.w, self.D.components_
        self.X_hat_l = []
        for i in range(len(self.w.T)):
	    	r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,*self.patch_size)
        	X_hat = reconstruct_from_patches_2d(r, self.X.shape)
                if self.log_amplitude:
                    X_hat = np.exp(X_hat) - 1.0
                if rectify: # half wave rectification
                    X_hat[X_hat<0] = 0
                self.X_hat_l.append(X_hat)
        if plotting:
            self.plot_individual_spectra(**kwargs)

    def plot_individual_spectra(self, **kwargs):
        "plot individual spectrum reconstructions for self.X_hat_l"
        if self.X_hat_l is None: return
        plt.figure()
        rn = np.ceil(self.n_components**0.5)
        for k in range(self.n_components):
            plt.subplot(rn,rn,k+1)
            br.feature_plot(self.X_hat_l[k], nofig=1, **kwargs)
            plt.title('%d'%(k))
        plt.suptitle('Component Reconstructions\n', fontsize=14)
예제 #19
0
def reconstruct_events(event_folder=None,
                       output_folder=None,
                       dictionary=None,
                       components=100,
                       alpha=1,
                       start=0,
                       end=np.inf,
                       actions=[],
                       random_state=None):
    if event_folder is None:
        event_folder = os.path.abspath(
            '/share/storage/vision/subway/features/')

    if output_folder is None:
        output_folder = os.path.abspath(
            '/share/storage/vision/subway/reconstructed/')

    events_files = get_events_files(event_folder)

    event_counter = 0

    results = []

    # Check which actions occur on each event
    for pth, events_start, events_end in events_files:
        data = sio.loadmat(pth)
        events = data["events"]

        # Avoid calculating files outside the window of interest
        if events_end <= start or events_start >= end:
            print "Skipping %s" % pth
            continue
        else:
            print "Processing %s" % pth

        intercept = None
        deviation = None
        dico = MiniBatchDictionaryLearning(n_components=components,
                                           alpha=alpha,
                                           n_iter=100)

        for event in events:
            (
                (x, ), (y, ), (t, )
            ), event_cuboids, event_descriptors, event_cuboid_locations, event_adjacency = event
            if (t >= start - 40) and (t <= end + 40):
                sort_order = np.argsort(event_descriptors[:, 2])
                X = event_descriptors[sort_order, :]

                if intercept is None:
                    intercept = np.mean(X, axis=0)

                original = X - intercept

                if deviation is None:
                    deviation = np.std(original, axis=0)

                original /= deviation

                dictionary = dico.fit(original).components_

                dico.set_params(transform_algorithm='lars',
                                transform_n_nonzero_coefs=5)

                code = dico.transform(original)
                error = (original - np.dot(code, dictionary))**2

                results.append(((x, y, t), code, error))

                event_counter += 1

    result_pth = os.path.join(
        output_folder, "reconstructed_events_clip-%s-%s.mat" % (start, end))

    sio.savemat(result_pth, {
        'results': results,
        'start': start,
        'end': end,
        'actions': actions
    })
    print "%s events saved in '%s'" % (event_counter, result_pth)
    return dictionary
예제 #20
0
def sp_deepdictionarylearning(s_p_d, i_p_d, dl_lambda1, dl_lambda2):
    sp_patches_data = np.copy(s_p_d)
    images_patches_data = np.copy(i_p_d)
    index = 0  # 图片索引
    sp_mean = np.mean(sp_patches_data, axis=0)  # 保存下来
    sp_patches_data -= sp_mean
    dico1 = MiniBatchDictionaryLearning(n_components=144,
                                        alpha=dl_lambda1,
                                        n_iter=200)
    V1 = dico1.fit(sp_patches_data).components_  # (144, 64)
    print('dictionary1 shape : ', V1.shape)
    transform_algorithms = [(('Orthogonal Matching Pursuit\n7 atoms', 'omp', {
        'transform_n_nonzero_coefs': 7
    }), ('Orthogonal Matching Pursuit\n7 atoms', 'omp', {
        'transform_n_nonzero_coefs': 7
    }))]
    # title, transform_algorithm, kwargs
    remove_files('Image_Salt_and_Pepper_DeepDictionaryLearning')
    for layer1, layer2 in transform_algorithms:
        dico1.set_params(transform_algorithm=layer1[1], **layer1[2])
        code1 = dico1.transform(sp_patches_data)
        #激活函数
        # code1 = sigmoid(code1)
        # code1 = relu_reverse_2(code1)
        dico2 = MiniBatchDictionaryLearning(n_components=256,
                                            alpha=dl_lambda2,
                                            n_iter=200)
        V2 = dico2.fit(code1).components_
        print('dictionary2 shape : ', V2.shape)
        dico2.set_params(transform_algorithm=layer2[1], **layer2[2])
        code2 = dico2.transform(code1)
        #逆激活函数
        # patches = np.dot(np.dot(code2, V2), V1)
        patches = np.dot(np.dot(code2, V2), V1)

        patches += sp_mean

        # 将patches从(62001,64)变回(62001,8,8)
        patches = patches.reshape(len(sp_patches_data), *(8, 8))

        if layer1[1] == 'threshold':
            patches -= patches.min()
            patches /= patches.max()

        # 通过reconstruct_from_patches_2d函数将patches重新拼接回图片
        reconstruction_image = reconstruct_from_patches_2d(patches, (256, 256))

        # 计算复原图片和原图的误差
        psnr_score = psnr(reconstruct_from_patches_2d(
            images_patches_data.reshape(len(images_patches_data), *(8, 8)),
            (256, 256)),
                          reconstruction_image,
                          PIXEL_MAX=1)

        plt.figure()
        plt.imshow(reconstruction_image, cmap='gray')
        plt.title('字典表示策略 : ' + layer1[0] + '\npsnr_score : ' +
                  str(psnr_score))
        plt.show()
        # 保存去噪复原图
        index += 1
        cv2.imwrite(
            'Image_Salt_and_Pepper_DeepDictionaryLearning\\' + 'algorithms_' +
            str(index) + '_psnr_score_' +
            str(round(psnr_score, 2)).replace('.', '__') + '.jpg',
            reconstruction_image * 255)
예제 #21
0
def sp_single_layer_dictionarylearning(s_p_d, i_p_d, dl_lambda):
    sp_patches_data = np.copy(s_p_d)
    images_patches_data = np.copy(i_p_d)
    index = 0  # 图片索引
    print('开始从椒盐噪声的图像中提取字典...')

    # 使用椒盐噪声训练字典
    # 每一行的data减去均值除以方差,这是zscore标准化的方法
    sp_mean = np.mean(sp_patches_data, axis=0)  # 保存下来
    sp_patches_data -= sp_mean

    # 初始化MiniBatchDictionaryLearning类,并按照初始参数初始化类的属性
    dico = MiniBatchDictionaryLearning(n_components=256,
                                       alpha=dl_lambda,
                                       n_iter=200)
    V = dico.fit(sp_patches_data).components_

    # 画出V中的字典,下面逐行解释
    '''figsize方法指明图片的大小,4.2英寸宽,4英寸高。其中一英寸的定义是80个像素点'''
    plt.figure(figsize=(8.2, 8))
    # 循环画出100个字典V中的字(n_components是字典的数量)
    '''enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,
    同时列出数据和数据下标,一般用在 for 循环当中。'''
    for i, comp in enumerate(V[:256]):
        plt.subplot(16, 16, i + 1)
        plt.imshow(comp.reshape((8, 8)),
                   cmap=plt.cm.gray_r,
                   interpolation='nearest')
        plt.xticks(())
        plt.yticks(())

    # 6个参数与注释后的6个属性对应
    plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08,
                        0.23)  # left, right, bottom, top, wspace, hspace
    plt.show()
    print('dictionary shape : ', V.shape)
    print('Dictionary learned on %d patches' % (len(sp_patches_data)))

    print('完成从椒盐噪声的图像中提取字典...')

    print('开始椒盐噪声的稀疏表示...')

    # 复原图片和原图的误差
    differents = []

    # 四种不同的字典表示策略
    transform_algorithms = [('Orthogonal Matching Pursuit\n7 atoms', 'omp', {
        'transform_n_nonzero_coefs': 7
    })]

    # 清空此文件夹中之前的文件
    remove_files('Image_Salt_and_Pepper_SingleLayer_DictionaryLearning')
    for title, transform_algorithm, kwargs in transform_algorithms:
        # 通过set_params对第二阶段的参数进行设置
        dico.set_params(transform_algorithm=transform_algorithm, **kwargs)
        # transform根据set_params对设完参数的模型进行字典表示,表示结果放在code中。
        # code总共有100列,每一列对应着V中的一个字典元素,
        # 所谓稀疏性就是code中每一行的大部分元素都是0,这样就可以用尽可能少的字典元素表示回去。
        code = dico.transform(sp_patches_data)
        # code矩阵乘V得到复原后的矩阵patches
        patches = np.dot(code, V)

        # 还原数据预处理
        patches += sp_mean

        # 将patches从(62001,64)变回(62001,8,8)
        patches = patches.reshape(len(sp_patches_data), *(8, 8))

        if transform_algorithm == 'threshold':
            patches -= patches.min()
            patches /= patches.max()

        # 通过reconstruct_from_patches_2d函数将patches重新拼接回图片
        reconstruction_image = reconstruct_from_patches_2d(patches, (256, 256))

        # 计算复原图片和原图的误差
        psnr_score = psnr(reconstruct_from_patches_2d(
            images_patches_data.reshape(len(images_patches_data), *(8, 8)),
            (256, 256)),
                          reconstruction_image,
                          PIXEL_MAX=1)

        differents.append(psnr_score)

        plt.figure()
        plt.imshow(reconstruction_image, cmap='gray')
        plt.title('字典表示策略 : ' + title + '\npsnr_score : ' + str(psnr_score))
        plt.show()
        # 保存去噪复原图
        index += 1
        cv2.imwrite(
            'Image_Salt_and_Pepper_SingleLayer_DictionaryLearning\\' +
            'algorithms_' + str(index) + '_psnr_score_' +
            str(round(psnr_score, 2)).replace('.', '__') + '.jpg',
            reconstruction_image * 255)

    print('完成椒盐噪声的稀疏表示...')
예제 #22
0
class Layer(object):
    def __init__(self, hierarchy, depth, patch_size, num_features, num_patches,
                 multiplier):
        """
         * depth - hierarchy level (1, 2, 3, etc.)
         * patch_size - number of pixels representing side of the square patch.
           like, 8 (8x8 patches)
         * num_features - how many components to learn
         * multiplier - num of subpatches we break patch into
           (0 for the first level). if 3, patch will contant 3x3 subpatches.
        """
        self.hierarchy = hierarchy
        self.depth = depth
        self.basement_size = patch_size
        self.num_features = num_features
        self.num_patches = num_patches
        self.multiplier = multiplier
        self.learning = MiniBatchDictionaryLearning(
            n_components=num_features,
            n_iter=3000,
            transform_algorithm='lasso_lars',
            transform_alpha=0.5,
            n_jobs=2)
        self.ready = False

    def get_data(self, data, max_patches=None):
        """
        Extracts raw data from patches.
        """
        max_patches = max_patches or self.num_patches
        if isinstance(data, np.ndarray):
            # one image
            patches = extract_patches_2d(
                data, (self.basement_size, self.basement_size),
                max_patches=max_patches)
        else:
            patches = []
            # multiple images
            for i in xrange(max_patches):
                idx = np.random.randint(len(data))  # selecting random image
                dx = dy = self.basement_size
                if data[idx].shape[0] <= dx or data[idx].shape[1] <= dy:
                    continue
                x = np.random.randint(data[idx].shape[0] - dx)
                y = np.random.randint(data[idx].shape[1] - dy)
                patch = data[idx][x:x + dx, y:y + dy]
                patches.append(patch.reshape(-1))
            patches = np.vstack(patches)
            patches = patches.reshape(patches.shape[0], self.basement_size,
                                      self.basement_size)
        print 'patches', patches.shape
        patches = preprocessing.scale(patches)
        return patches

    def learn(self, data):
        data = data.reshape(data.shape[0], -1)
        self.learning.fit(data)
        self.ready = True

    @property
    def output_size(self):
        return int(np.sqrt(self.num_features))

    @property
    def input_size(self):
        if self.depth == 0:
            return self.basement_size
        else:
            prev_layer = self.hierarchy.layers[self.depth - 1]
            r = prev_layer.output_size * self.multiplier
            return r
        return self._input_size

    @property
    def features(self):
        return self.learning.components_

    # def get_features(self):
    #     # going from up to down
    #     result = []
    #     layers = self.hierarchy.layers[: self.depth][::-1]
    #     if self.depth == 0:
    #         return self.features

    #     previous_layer = self.hierarchy.layers[self.depth - 1]
    #     for feature in self.features:
    #         multiplier = self.multiplier
    #         feature = feature.reshape(self.multiplier * previous_layer.output_size,
    #                                   self.multiplier * previous_layer.output_size,)
    #         for other_layer in layers:
    #             expressed_feature = np.empty((multiplier * other_layer.input_size,
    #                                           multiplier * other_layer.input_size))
    #             enc_n = other_layer.output_size
    #             n = other_layer.input_size
    #             for dx in range(multiplier):
    #                 for dy in range(multiplier):
    #                     encoded_subfeature = feature[dx * enc_n: (dx + 1) * enc_n,
    #                                                  dy * enc_n: (dy + 1) * enc_n]
    #                     prev_patch = np.dot(encoded_subfeature.reshape(-1), other_layer.features)
    #                     expressed_feature[dx * n: (dx + 1) * n, dy * n: (dy + 1) * n] = prev_patch.reshape(n, n)
    #             feature = expressed_feature
    #             multiplier *= other_layer.multiplier
    #         result.append(expressed_feature.reshape(-1))
    #     result = np.vstack(result)
    #     return result

    def get_features(self):
        # going from down to up. these two methods are look like the same
        if self.depth == 0:
            return self.features
        layers = self.hierarchy.layers[1:self.depth + 1]  # down --> up
        features = self.hierarchy.layers[
            0].features  # to express upper feature

        for i, layer in enumerate(layers, start=1):
            previous_layer = self.hierarchy.layers[i - 1]
            expressed_features = []
            for feature in layer.features:
                n = previous_layer.output_size
                m = int(np.sqrt(features.shape[1]))
                feature = feature.reshape((layer.input_size, layer.input_size))
                expressed_feature = np.empty(
                    (layer.multiplier * m, layer.multiplier * m))
                for dx in range(layer.multiplier):
                    for dy in range(layer.multiplier):
                        subfeature = feature[dx * n:(dx + 1) * n,
                                             dy * n:(dy + 1) * n]
                        # now that's previous_layer's code. replace it with reconstruction
                        expressed_subfeature = np.dot(subfeature.reshape(-1),
                                                      features)
                        expressed_feature[dx * m:(dx + 1) * m,
                                          dy * m:(dy + 1) *
                                          m] = expressed_subfeature.reshape(
                                              (m, m))
                expressed_features.append(expressed_feature.reshape(-1))
            features = np.vstack(expressed_features)
        return features
예제 #23
0
    pca.fit(imagenet_features)
    pca_feat = pca.transform(imagenet_features)

    # Shufflinig
    ind = range(len(imagenet_targets))
    np.random.shuffle(ind)
    imagenet_targets = imagenet_targets[ind]
    pca_feat = pca_feat[ind, :]

    # Dictionary Learning on Source
    dict_sparse = MiniBatchDictionaryLearning(alpha=1,
                                              n_components=300,
                                              verbose=3,
                                              batch_size=10,
                                              n_iter=1000)
    dict_sparse.fit(pca_feat)
    Ds_0 = dict_sparse.components_

    # Dictionary Learning on Target
    dict_sparse = DictionaryLearning(alpha=1,
                                     n_components=300,
                                     max_iter=3,
                                     verbose=3)
    dict_sparse.fit(features)
    Dt_0 = dict_sparse.components_
    coder = SparseCoder(dictionary=Dt_0)
    Rt_0 = coder.transform(features)

    # Target Reconstruction
    Xt_1 = np.mat(Rt_0) * np.mat(Ds_0)
    dict_sparse = DictionaryLearning(alpha=1,
예제 #24
0
class Sparsecode(BaseEstimator, TransformerMixin):
    def __init__(self, patch_file=None, patch_num=10000, patch_size=(16, 16),\
                n_components=384,  alpha = 1, n_iter=1000, batch_size=200):
        self.patch_num = patch_num
        self.patch_size = patch_size
        self.patch_file = patch_file
        
        self.n_components = n_components
        self.alpha = alpha #sparsity controlling parameter
        self.n_iter = n_iter
        self.batch_size = batch_size

    
    def fit(self, X=None, y=None):
        if self.patch_file is None:
            num = self.patch_num // X.size
            data = []
            for item in X:
                img = imread(str(item[0]))
                img = img_as_ubyte(rgb2gray(img))
                #img = self.binary(img) # 二值化
                tmp = extract_patches_2d(img, self.patch_size, max_patches = num,\
                                        random_state=np.random.RandomState())
                data.append(tmp)
            
            data = np.vstack(data)
            data = data.reshape(data.shape[0], -1)
            data = np.asarray(data, 'float32')
        else:
            data = np.load(self.patch_file,'r+') # load npy file, 注意模式,因为后面需要修改
        
        data = np.require(data, dtype=np.float32)
        
        # Standardization
        #logging.info("Pre-processing : Standardization...")
        #self.standard = StandardScaler()
        #data = self.standard.fit_transform(data)
            
        # whiten
        #logging.info("Pre-processing : PCA Whiten...")
        #self.pca = RandomizedPCA(copy=True, whiten=True)
        #data = self.pca.fit_transform(data)
        
        # whiten
        logging.info("Pre-processing : ZCA Whiten...")
        self.zca = ZCA()
        data = self.zca.fit_transform(data)
        
        # 0-1 scaling 都可以用preprocessing模块实现
        #self.minmax = MinMaxScaler()
        #data = self.minmax.fit_transform(data)
        
        """k-means
        self.kmeans = MiniBatchKMeans(n_clusters=self.n_components, init='k-means++', \
                                    max_iter=self.n_iter, batch_size=self.batch_size, verbose=1,\
                                    tol=0.0, max_no_improvement=100,\
                                    init_size=None, n_init=3, random_state=np.random.RandomState(0),\
                                    reassignment_ratio=0.0001)
        logging.info("Sparse coding : Phase 1 - Codebook learning (K-means).")
        self.kmeans.fit(data)
        
        logging.info("Sparse coding : Phase 2 - Define coding method (omp,lars...).")
        self.coder = SparseCoder(dictionary=self.kmeans.cluster_centers_, 
                                 transform_n_nonzero_coefs=256,
                                 transform_alpha=None, 
                                 transform_algorithm='lasso_lars',
                                 n_jobs = 1)
        """
        #'''genertic
        logging.info("Sparse coding...")
        self.coder = MiniBatchDictionaryLearning(n_components=self.n_components, \
                                           alpha=self.alpha, n_iter=self.n_iter, \
                                           batch_size =self.batch_size, verbose=True)
        self.coder.fit(data)
        self.coder.transform_algorithm = 'omp'
        self.coder.transform_alpha = 0.1 # omp情况下,代表重建的误差
        #'''
        return self
    
    def transform(self, X):
        #whiten
        #X_whiten = self.pca.transform(X)
        logging.info("Compute the sparse coding of X.")
        X = np.require(X, dtype=np.float32)
        
        #TODO: 是否一定需要先fit,才能transform
        #X = self.minmax.fit_transform(X)
        
        # -mean/std and whiten
        #X = self.standard.transform(X)
        #X = self.pca.transform(X)
        
        # ZCA
        X = self.zca.transform(X)

        # MiniBatchDictionaryLearning
        # return self.dico.transform(X_whiten)
        
        # k-means
        # TODO: sparse coder method? problem...
        return self.coder.transform(X)
        
    
    def get_params(self, deep=True):
        return {"patch_num": self.patch_num,
                "patch_size":self.patch_size,
                "alpha":self.alpha,
                "n_components":self.n_components,
                "n_iter":self.n_iter,
                "batch_size":self.batch_size}
                
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            self.__setattr__(parameter, value)
        return self
def get_dictionary_data(n_comp=20, zero_index=True):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size, max_patches=0.10))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Training SVM with the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 64))
    multiplied_labels = np.empty((0))

    for i in range(len(labeled_data)):
        image = labeled_data[i, :, :]
        label = labels[i]
        data = extract_patches_2d(image, patch_size, max_patches=0.50)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)

        reconstructed_images = np.concatenate([reconstructed_images, patches])
        extended_labels = np.asarray([label] * len(patches))
        multiplied_labels = np.concatenate([multiplied_labels, extended_labels])

    print(reconstructed_images.shape, multiplied_labels.shape)
    svc = SVC()
    #print('Getting cross-val scores...')
    #scores = cross_validation.cross_val_score(svc, reconstructed_images, multiplied_labels, cv=10)
    #print('cross-val scores:', scores)
    #print('cross-val mean:', np.mean(scores))
    #print('cross-val variance:', np.var(scores))

    print('done in %.2fs.' % (time() - t0))

    svc.fit(reconstructed_images, multiplied_labels)

    print('Reconstructing the test images...')
    t0 = time()

    predictions = []

    for i, image in enumerate(test_data):
        data = extract_patches_2d(image, patch_size, max_patches=0.25)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)

        pred = svc.predict(patches)
        print('Variance in the predictions:', np.var(pred))
        predictions.append(mode(pred))

    print('done in %.2fs.' % (time() - t0))

    predictions += 1
    util.write_results(predictions, 'svm_patches_25_percent_20_comp.csv')
def plot_image_denoising():
    try:  # SciPy >= 0.16 have face in misc
        from scipy.misc import face
        face = face(gray=True)
    except ImportError:
        face = sp.face(gray=True)

    # Convert from uint8 representation with values between 0 and 255 to
    # a floating point representation with values between 0 and 1.
    face = face / 255.

    # downsample for higher speed
    face = face[::4, ::4] + face[1::4, ::4] + face[::4, 1::4] + face[1::4,
                                                                     1::4]
    face /= 4.0
    height, width = face.shape

    # Distort the right half of the image
    print('Distorting image...')
    distorted = face.copy()
    distorted[:, width // 2:] += 0.075 * np.random.randn(height, width // 2)

    # Extract all reference patches from the left half of the image
    print('Extracting reference patches...')
    t0 = time()
    patch_size = (7, 7)
    data = extract_patches_2d(distorted[:, :width // 2], patch_size)
    data = data.reshape(data.shape[0], -1)
    data -= np.mean(data, axis=0)
    data /= np.std(data, axis=0)
    print('done in %.2fs.' % (time() - t0))

    # #############################################################################
    # Learn the dictionary from reference patches

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=500)
    V = dico.fit(data).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    plt.figure(figsize=(4.2, 4))
    for i, comp in enumerate(V[:100]):
        plt.subplot(10, 10, i + 1)
        plt.imshow(comp.reshape(patch_size),
                   cmap=plt.cm.gray_r,
                   interpolation='nearest')
        plt.xticks(())
        plt.yticks(())
    plt.suptitle('Dictionary learned from face patches\n' +
                 'Train time %.1fs on %d patches' % (dt, len(data)),
                 fontsize=16)
    plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    # #############################################################################
    # Display the distorted image

    def show_with_diff(image, reference, title):
        """Helper function to display denoising"""
        plt.figure(figsize=(5, 3.3))
        plt.subplot(1, 2, 1)
        plt.title('Image')
        plt.imshow(image,
                   vmin=0,
                   vmax=1,
                   cmap=plt.cm.gray,
                   interpolation='nearest')
        plt.xticks(())
        plt.yticks(())
        plt.subplot(1, 2, 2)
        difference = image - reference

        plt.title('Difference (norm: %.2f)' % np.sqrt(np.sum(difference**2)))
        plt.imshow(difference,
                   vmin=-0.5,
                   vmax=0.5,
                   cmap=plt.cm.PuOr,
                   interpolation='nearest')
        plt.xticks(())
        plt.yticks(())
        plt.suptitle(title, size=16)
        plt.subplots_adjust(0.02, 0.02, 0.98, 0.79, 0.02, 0.2)

    show_with_diff(distorted, face, 'Distorted image')

    # #############################################################################
    # Extract noisy patches and reconstruct them using the dictionary

    print('Extracting noisy patches... ')
    t0 = time()
    data = extract_patches_2d(distorted[:, width // 2:], patch_size)
    data = data.reshape(data.shape[0], -1)
    intercept = np.mean(data, axis=0)
    data -= intercept
    print('done in %.2fs.' % (time() - t0))

    transform_algorithms = [('Orthogonal Matching Pursuit\n1 atom', 'omp', {
        'transform_n_nonzero_coefs': 1
    }),
                            ('Orthogonal Matching Pursuit\n2 atoms', 'omp', {
                                'transform_n_nonzero_coefs': 2
                            }),
                            ('Least-angle regression\n5 atoms', 'lars', {
                                'transform_n_nonzero_coefs': 5
                            }),
                            ('Thresholding\n alpha=0.1', 'threshold', {
                                'transform_alpha': .1
                            })]

    reconstructions = {}
    for title, transform_algorithm, kwargs in transform_algorithms:
        print(title + '...')
        reconstructions[title] = face.copy()
        t0 = time()
        dico.set_params(transform_algorithm=transform_algorithm, **kwargs)
        code = dico.transform(data)
        patches = np.dot(code, V)

        patches += intercept
        patches = patches.reshape(len(data), *patch_size)
        if transform_algorithm == 'threshold':
            patches -= patches.min()
            patches /= patches.max()
        reconstructions[title][:, width // 2:] = reconstruct_from_patches_2d(
            patches, (height, width // 2))
        dt = time() - t0
        print('done in %.2fs.' % dt)
        show_with_diff(reconstructions[title], face,
                       title + ' (time: %.1fs)' % dt)

    plt.show()
예제 #27
0
         data = list(
             numpy.array(patch, numpy.float32).flatten()
             for patch in patches)
         data = numpy.array(data)
         data /= 256.
         mean = numpy.mean(data, axis=0)
         data -= mean
         std = numpy.std(data, axis=0)
         data /= std
 if (not restart) or steps[restart] <= steps['FIT_MODEL']:
     with Timer("Fitting model ..."):
         # Fit the sparse model using Dictionary Learning.
         cols = ceil(sqrt(N_COMP))
         rows = ceil(N_COMP / float(cols))
         model = MiniBatchDictionaryLearning(n_components=N_COMP, alpha=1)
         fit = model.fit(data)
 if (not restart) or steps[restart] <= steps['DISPLAY_BASIS']:
     with Timer("Display components ..."):
         # Display the basis components (aka the dictionary).
         pylab.ion()
         pylab.show()
         display(fit)
 if (not restart) or steps[restart] <= steps['COMPUTE_PROJ']:
     with Timer("Compute projection ..."):
         # Project the input patches onto the basis using Orthonormal Matching Pursuit with 2 components.
         model.set_params(transform_algorithm='omp',
                          transform_n_nonzero_coefs=N_ATOMS)
         # the intention is simply this:
         #	code = model.transform(data)
         # but we chunk it up and store it in a sparse matrix for efficiency
         code = []
예제 #28
0
print('Extracting reference patches...')
t0 = time()
patch_size = (7, 7)
data = extract_patches_2d(distorted[:, :width // 2], patch_size)
data = data.reshape(data.shape[0], -1)
data -= np.mean(data, axis=0)
data /= np.std(data, axis=0)
print('done in %.2fs.' % (time() - t0))

###############################################################################
# Learn the dictionary from reference patches

print('Learning the dictionary...')
t0 = time()
dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=500)
V = dico.fit(data).components_
dt = time() - t0
print('done in %.2fs.' % dt)

plt.figure(figsize=(4.2, 4))
for i, comp in enumerate(V[:100]):
    plt.subplot(10, 10, i + 1)
    plt.imshow(comp.reshape(patch_size),
               cmap=plt.cm.gray_r,
               interpolation='nearest')
    plt.xticks(())
    plt.yticks(())
plt.suptitle('Dictionary learned from face patches\n' +
             'Train time %.1fs on %d patches' % (dt, len(data)),
             fontsize=16)
plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
예제 #29
0
 def peakmem_fit(self, params):
     estimator = MiniBatchDictionaryLearning(**self.dl_params)
     estimator.fit(self.data)
예제 #30
0
class SparseApproxSpectrum(object):
    def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs):
        self.omp = OrthogonalMatchingPursuit()
        self.n_components = n_components
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.D = None
        self.data = None
        self.components = None
        self.standardize=False

    def _extract_data_patches(self, X):
        self.X = X
        data = extract_patches_2d(X, self.patch_size)
        data = data.reshape(data.shape[0], -1)
        if len(data)>self.max_samples:
            data = np.random.permutation(data)[:self.max_samples]
        print(data.shape)
        if self.standardize:
            self.mn = np.mean(data, axis=0) 
            self.std = np.std(data, axis=0)
            data -= self.mn
            data /= self.std
        self.data = data

    def extract_codes(self, X, standardize=False):
        self.standardize=standardize
        self._extract_data_patches(X)
        self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, alpha=1, n_iter=500)
        print("Dictionary learning from data...")
        self.D = self.dico.fit(self.data)
        return self

    def plot_codes(self, cbar=False, **kwargs):
        #plt.figure(figsize=(4.2, 4))
        N = int(np.ceil(np.sqrt(self.n_components)))
        kwargs.setdefault('cmap', pl.cm.gray_r)
        kwargs.setdefault('origin','bottom')
        kwargs.setdefault('interpolation','nearest')
        for i, comp in enumerate(self.D.components_):
            plt.subplot(N, N, i + 1)
            comp  = comp * self.std + self.mn if self.standardize else comp
            plt.imshow(comp.reshape(self.patch_size), **kwargs)
            if cbar:
                plt.colorbar()
            plt.xticks(())
            plt.yticks(())
        plt.suptitle('Dictionary learned from spectrum patches\n', fontsize=16)
        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav',**kwargs):
        flist=glob.glob(dir_expr)
        self.X = np.vstack([feature_scale(LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T
        self.D = extract_codes(self.X, **kwargs)
        self.plot_codes(**kwargs)
        return self

    def _get_approximation_coefs(self,data, components):
        w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data])
        return w

    def reconstruct_spectrum(self, w=None, randomize=False):
        data = self.data
        components = self.D.components_
        if w is None:
            self.w = self._get_approximation_coefs(data, components)
            w = self.w
        if self.standardize:
            for comp in components: comp  = comp * self.std + self.mn
        if randomize:
            components = np.random.permutation(components)
        recon = np.dot(w, components).reshape(-1,self.patch_size[0],self.patch_size[1])
        self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape)
        return self

    def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, **kwargs):
        self.reconstruct_spectrum(w,randomize)
        w, components = self.w, self.D.components_
        self.X_hat_l = []
        for i in range(len(self.w.T)):
            r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,self.patch_size[0],self.patch_size[1])
            self.X_hat_l.append(reconstruct_from_patches_2d(r, self.X.shape))
        if plotting:
            plt.figure()            
            for k in range(self.n_components):
                plt.subplot(self.n_components**0.5,self.n_components**0.5,k+1)
                feature_plot(self.X_hat_l[k],nofig=1,**kwargs)
        return self
예제 #31
0
def get_dictionary_data(n_comp=20, zero_index=False):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size,
                                           max_patches=0.01))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False,
                                                      zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Reconstructing the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 32, 32))

    for i, image in enumerate(labeled_data):
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_images = np.concatenate([reconstructed_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_images.shape
    training_images = reconstructed_images.reshape(
        reconstructed_images.shape[0],
        reconstructed_images.shape[1] * reconstructed_images.shape[2])
    assert training_images.shape == (n, x * y)

    print('Reconstructing the test images...')
    t0 = time()
    reconstructed_test_images = np.empty((0, 32, 32))

    for image in test_data:
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_test_images = np.concatenate(
            [reconstructed_test_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_test_images.shape
    test_images = reconstructed_test_images.reshape(
        reconstructed_test_images.shape[0],
        reconstructed_test_images.shape[1] *
        reconstructed_test_images.shape[2])
    assert test_images.shape == (n, x * y)

    return (training_images, labels, test_images)
예제 #32
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
In this I check How good are AR models for AD.
Created on Wed Jul 25 09:11:44 2018

@author: haroonr
"""
import matplotlib.pyplot as plt

#%%
from sklearn.decomposition import MiniBatchDictionaryLearning
dico = MiniBatchDictionaryLearning(n_components=220, alpha=1, n_iter=100)

train_data = days_obs.loc[datetime.date(2018,2,13):datetime.date(2018,2,28)]
D = dico.fit(train_data).components_

#%%
fig,ax = plt.subplots(ncols = 10, nrows = D.shape[0]/10 )
i = 0
for row in ax:
    for col in row:
        col.plot(D[i])
        i = i +1
    
예제 #33
0
    initial_patch_size = patches.shape
    patches = patches.reshape(-1, patch_size[0] * patch_size[1])

    patches_recto.append(patches)

# Change the size of patches
patches_recto = np.asarray(patches_recto)
patches_recto = patches_recto.reshape(-1, m * m)
# Do the normalisation here
patches_recto -= np.mean(patches_recto, axis=0)  # remove the mean
patches_recto /= np.std(patches_recto, axis=0)  # normalise each patch

dico_recto = MiniBatchDictionaryLearning(
    n_components=100, alpha=0.7,
    n_iter=400)  #TODO:check with different parameters
V_recto = dico_recto.fit(patches_recto).components_
"""
# plot the dictionary
plt.figure(figsize=(8, 6))
for i, comp in enumerate(V_recto[:100]):
    plt.subplot(10, 10, i + 1)
    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,interpolation='nearest')
    plt.xticks(())
    plt.yticks(())
plt.suptitle('Recto dictionary learned from patches')
plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
"""

print('Learning the dictionary for verso images...')
patches_verso = []
for pic_set in np.arange(4):
예제 #34
0
파일: ffoct.py 프로젝트: gsidier/ffoct
			# Group the patches, reshape the patches as simple vectors,
			# rescale and center the data.
			data = list( numpy.array(patch, numpy.float32).flatten() for patch in patches )
			data = numpy.array(data)
			data /= 256.
			mean = numpy.mean(data, axis = 0)
			data -= mean
			std = numpy.std(data, axis = 0)
			data /= std
	if (not restart) or steps[restart] <= steps['FIT_MODEL']:
		with Timer("Fitting model ..."):
			# Fit the sparse model using Dictionary Learning.
			cols = ceil(sqrt(N_COMP))
			rows = ceil(N_COMP / float(cols))
			model = MiniBatchDictionaryLearning(n_components = N_COMP, alpha = 1)
			fit = model.fit(data)
	if (not restart) or steps[restart] <= steps['DISPLAY_BASIS']:
		with Timer("Display components ..."): 
			# Display the basis components (aka the dictionary).
			pylab.ion()
			pylab.show()
			display(fit)
	if (not restart) or steps[restart] <= steps['COMPUTE_PROJ']:
		with Timer("Compute projection ..."):
			# Project the input patches onto the basis using Orthonormal Matching Pursuit with 2 components.
			model.set_params(transform_algorithm = 'omp', transform_n_nonzero_coefs = N_ATOMS)
			# the intention is simply this:
			#	code = model.transform(data)
			# but we chunk it up and store it in a sparse matrix for efficiency
			code = []
			CHUNK = 1000
예제 #35
0
# In[37]:

data = extract_patches(img / 255, (8, 8, 3), max_patches=1000)
data = data.reshape(data.shape[0], -1)
mean = np.mean(data, axis=0)
data -= mean
data /= np.std(data, axis=0)

# In[59]:

print('Size of Dictionary: ', data.shape)

# In[38]:

dic = MiniBatchDictionaryLearning(n_components=256, alpha=1, n_iter=500)
v = dic.fit(data).components_

# In[39]:

patch_size = (8, 8, 3)

# In[34]:

plt.figure(figsize=(4.2, 4))
for i, comp in enumerate(v[:100]):
    plt.subplot(10, 10, i + 1)
    plt.imshow(comp.reshape(patch_size) * 255,
               cmap=plt.cm.gray_r,
               interpolation='nearest')
    plt.xticks(())
    plt.yticks(())
예제 #36
0
                                            delimiter=';')
                    sparseCode = np.loadtxt(
                        filePath + 'sparseCodeT-MODNoisy_' + fileNameSufix,
                        delimiter=';')
                elif title is 'K-HOSVD_javaORMP':
                    dictionary = np.loadtxt(filePath + 'dictK-HOSVDNoisy_' +
                                            fileNameSufix,
                                            delimiter=';')
                    sparseCode = np.loadtxt(
                        filePath + 'sparseCodeK-HOSVDNoisy_' + fileNameSufix,
                        delimiter=';')
                elif title is 'MiniBatchDL_OMP':
                    miniBatch = MiniBatchDictionaryLearning(n_components=K,
                                                            alpha=1,
                                                            n_iter=noIt)
                    dictionary = miniBatch.fit(noisyPatches).components_
                    miniBatch.set_params(
                        transform_algorithm=transform_algorithm, **kwargs)
                    sparseCode = miniBatch.transform(noisyPatches)
                reconstruction = np.dot(sparseCode, dictionary)
                reconstruction += noiseMean
                reconstruction = reconstruction.reshape(
                    len(noisyPatches), *patch_size)
                reconstructions[title][:, width //
                                       2:] = reconstruct_from_patches_2d(
                                           reconstruction,
                                           (height, width // 2))
                print_comparison(reconstructions[title], face, title)
results.close()

# ToDo:
# 1 辞書学習 ---------------------------------------------------------------------------------

# パラメータの設定
n_components = 50
alpha = 1
batch_size = 200
n_iter = 25
random_state = 2018

# インスタンスの作成
miniBatchDictLearning = MiniBatchDictionaryLearning(n_components=n_components,
                                                    alpha=alpha,
                                                    batch_size=batch_size,
                                                    n_iter=n_iter,
                                                    random_state=random_state)

# 学習器の作成
miniBatchDictLearning.fit(X_train.loc[:, :10000])

# 学習器の適用
X_train_miniBatchDictLearning = miniBatchDictLearning.transform(X_train)

# データフレームに変換
X_train_miniBatchDictLearning = pd.DataFrame(
    data=X_train_miniBatchDictLearning, index=train_index)

# プロット表示
scatterPlot(X_train_miniBatchDictLearning, y_train,
            "Mini-batch Dictionary Learning")
예제 #38
0
def dictionary_learning_MHOF(flow_hist_H_400):
    from sklearn.decomposition import MiniBatchDictionaryLearning
    dico=MiniBatchDictionaryLearning(n_components=400,alpha=1,n_iter=500)
    dic=dico.fit(flow_hist_H_400).components_
    #coeffs=dico.transform(flow_hist_H_400)
    return dic
예제 #39
0
#kitei suu
num_basis = 100

#imagelist read
imgArray = ImageListFile2Array('patchlist2.txt')

#Dictionary syokika
print 'Learning the dictionary... '
t0 = time()
dico = MiniBatchDictionaryLearning(n_components=num_basis,
                                   alpha=1.0,
                                   transform_algorithm='lasso_lars',
                                   transform_alpha=1.0,
                                   fit_algorithm='lars',
                                   n_iter=500)

#heikin0 hensa1
M = np.mean(imgArray, axis=0)[np.newaxis, :]
whiteArray = imgArray - M
whiteArray /= np.std(whiteArray, axis=0)

#Dictionary keisan
V = dico.fit(whiteArray).components_

#syorizikann
dt = time() - t0
print 'done in %.2fs.' % dt

#Dictionary save
np.save('Dictionaries2.npy', V)
예제 #40
0
    patches_recto.append(patches1)
    patches_recto.append(patches2)

patches_recto = np.reshape(patches_recto, (-1, m * m))
patches_recto -= np.mean(patches_recto, axis=0)  # remove the mean
patches_recto /= np.std(patches_recto, axis=0)  # normalize each patch

dict_components = 100
# recto dictionary
print('Learning the dictionary...')
dico = MiniBatchDictionaryLearning(n_components=dict_components,
                                   alpha=1,
                                   n_iter=400)

# fitting the recto patches
V_recto = dico.fit(patches_recto).components_

#verso dictionary = flipped recto dictionary
V_verso = np.reshape(V_recto, (dict_components, m, m))
for i in range(dict_components):
    V_verso[i] = np.fliplr(V_verso[i])
V_verso = np.reshape(V_verso, (dict_components, m * m))


def Dic_proj_recto(data, n_coef, alpha):
    """
    The dictionary projection method
    """
    data = patchify(data, patch_size, step)
    data = data.reshape(-1, patch_size[0] * patch_size[1])
    intercept = np.mean(data, axis=0)
예제 #41
0
        coder = MiniBatchDictionaryLearning(n_components=dict_size,
                                            transform_algorithm='omp',
                                            alpha=threshold,
                                            transform_alpha=threshold,
                                            transform_n_nonzero_coefs=int(
                                                dict_size * args.density),
                                            batch_size=batchSize,
                                            n_iter=iteration,
                                            verbose=True)
    elif args.method == "lasso":
        coder = MiniBatchDictionaryLearning(n_components=dict_size,
                                            transform_algorithm='lasso_lars',
                                            transform_alpha=threshold,
                                            batch_size=batchSize,
                                            n_iter=iteration,
                                            verbose=True)
    elif args.method == "svd":
        coder = TruncatedSVD(n_components=dict_size)

    coder.fit(X_train)
    W_learned = coder.components_
else:
    W_learned = np.load(file(args.dictInput))['arr_0']
    coder = SparseCoder(dictionary=W_learned,
                        transform_n_nonzero_coefs=int(dict_size *
                                                      args.density))

Y_learned = coder.transform(X_test)
evaluate(X_test, Y_learned, W_learned, iteration, threshold, args.method,
         args.resultFile, args.dictOutput)
예제 #42
0
em.run()

dlog.close(True)
pprint("Done")


# ### Mini-Batch Dictionary Learning
# 
# Alternative, since the EM library gives numerical errors

# In[20]:

from sklearn.decomposition import MiniBatchDictionaryLearning

mbdic = MiniBatchDictionaryLearning(n_components=30,verbose=True)
mbdic.fit(patches_flat)


# ### Visualize the dictionary atoms

# In[21]:

V = mbdic.components_
plt.figure(figsize=(15,12))
for i,comp in enumerate(V):
    plt.subplot(10,10,i+1)
    plt.imshow(comp.reshape(patchsize).T,origin='lower',interpolation='nearest',aspect='auto',cmap='viridis')
    


# ### Reconstruct some data with the dictionary
예제 #43
0
    test_X_img_patches = image.extract_patches_2d(test_X_img, (patch_w, patch_h), max_patches=n_patches_ea_pic,
                                                  random_state=0)

    test_X_patches[i * n_patches_ea_pic: (i + 1) * n_patches_ea_pic] = test_X_img_patches.reshape(n_patches_ea_pic,
                                                                                                  patch_w * patch_h * 3)

print "test_X_patches", test_X_patches.shape

###############################################################################
# Dictionary Learning
n_components = 576

print("\nSparse Coding Dictionary Learning")
# pca = RandomizedPCA(n_components=n_components).fit(train_X)
dl = MiniBatchDictionaryLearning(n_components)
dl.fit(train_X_patches)

print "X_train.shape", train_X.shape
print "Components shape", dl.components_.shape

# components = dl.components().reshape((n_components, n_features))
components = dl.components_

# Visualizing the components as images
component_titles = ["component %d" % i for i in range(components.shape[0])]
plot_gallery("Visualizing top components", components, component_titles, patch_w, patch_h, n_row=24, n_col=24)
plt.show()

###############################################################################
# Sparse Encoding
print("\nSparse Encoding")
예제 #44
0
class BoVWFeature(TransformerMixin):
    """ 
    Extract BoVW Feature
        
    Parameters
    ----------
    codebook_size : int
      the size of codebook, default:1000
    
    method : str
      codebook's compute method , value: 'sc','km'
      
    """
    def __init__(self, codebook_size=512, method='sc'):
        self.codebook_size = codebook_size
        self.method = method
        self.patch_num = 40000
        self.patch_size = 8
        self.sample = 'random'
        self.feature = 'raw' # raw, surf, hog

    
    def fit(self, X, y=None):
        # compute the codes
        print 'Extracting patchs...'
        patchs = []
        num = self.patch_num // X.size
        for x in X:
            img = imread(str(x[0]))
            tmp = extract_patches_2d(img, (self.patch_size,self.patch_size), \
                                     max_patches=num, random_state=np.random.RandomState())
            patchs.append(tmp)
        data = np.vstack(patchs)
        data = data.reshape(data.shape[0], -1)
        
        data -= np.mean(data, axis=0)
        data = data/np.std(data, axis=0)
        
        print 'Learning codebook...'
        if self.method == 'sc':
            self.dico = MiniBatchDictionaryLearning(n_components=self.codebook_size, \
                                               alpha=1, n_iter=100, batch_size =100, verbose=True)
            self.dico.fit(data)
        elif self.method=='km':
            # self.dico = MiniBatchKMeans(n_clusters=self.codebook_size)
            pass
        
        return self
    
    def transform(self, X):
        """         
        Parameters
        ----------
        X : {array-like}, shape = [n_samples, 1]
            Training vectors, where n_samples is the number of samples and
            1 is image path.
      
        Returns
        -------

          array-like = [n_samples, features]
            Class labels predicted by each classifier.
        
        """
        print 'Extracting feature...'
        # setting the dictionary
        self.dico.set_params(transform_algorithm='lars')
        results = []
        for sample in X:
            img = imread(str(sample[0]))
            tmp = extract_patches_2d(img, (self.patch_size,self.patch_size), \
                                     max_patches=300, random_state=np.random.RandomState())
            data = tmp.reshape(tmp.shape[0], -1)
            data = data-np.mean(data, axis=0)
            data = data/np.std(data, axis=0)
            code = self.dico.transform(data)
            results.append(code.sum(axis=0))
        return np.vstack(results)
    
    def get_params(self, deep=True):
        return {"codebook_size": self.codebook_size}
# In[19]:

#normalize patches so we can learn the dictionary
norm_data = stacked_patches
norm_data -= np.mean(norm_data, axis=0)
norm_data /= np.std(norm_data, axis=0)

# In[20]:

#Learn dictionary
dictionary = MiniBatchDictionaryLearning(n_components=100,
                                         alpha=1,
                                         batch_size=10,
                                         n_iter=500)
V = dictionary.fit(norm_data).components_  #this is the dictionary

# In[21]:

#pre-process noisy file
distorted_patches = extract_patches_2d(distorted, patch_size)
distorted_stacked_patches = distorted_patches.reshape(
    distorted_patches.shape[0], -1)

#center the data
intercept = np.mean(distorted_stacked_patches, axis=0)
distorted_stacked_patches -= intercept

# In[22]:

#find sparse code of distorted image given the dictionary
                    f'{cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel'
            ):
                dico = pickle.load(
                    open(
                        f'{cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel',
                        'rb'))
                print(
                    f'Use hitted {cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel'
                )
                hit = True
            else:
                dico = DictionaryLearning(n_components=n_components,
                                          n_jobs=-3,
                                          max_iter=n_iter,
                                          verbose=True)
                dico.fit(images)
                print(f'{dico.n_iter_} iters')
            timer.stop(start=' ')
            n_iter_actual = dico.n_iter_

            if cfg.save and not hit:
                np.save(f'{cfg.save_path}/all_{n_components}_{n_iter_actual}',
                        dico.components_)
                pickle.dump(
                    dico,
                    open(
                        f'{cfg.save_path}/all_{n_components}_{n_iter_actual}.sklearnmodel',
                        'wb'))

            # Calculate the mIOU based on cats
            for cat_id in cfg.select_cat:
예제 #47
0

if len(sys.argv) != 4:
    sys.stderr.write('usage: %s data_dir cosim_size dict_size\n' % sys.argv[0])
    sys.exit(1)

data_dir = sys.argv[1]
cosim_size = int(sys.argv[2])
dict_size = int(sys.argv[3])

M = np.memmap(data_dir + '/cosimilarity.dat', dtype='float32', mode='r', shape=(cosim_size, cosim_size))
M = M[:500]

d = MiniBatchDictionaryLearning(dict_size, n_iter=10, batch_size=1000, verbose=True, n_jobs=-1)

d.fit(M)

np.save(data_dir + '/components.bin', d.components_)

component_set = set()
for c in d.components_:
    component_set.add(tuple(c))

ids = open(data_dir + '/dictionary_indexes.txt', 'w')
for (i, row) in enumerate(M):
    if i % 1000:
        print('checking to see if row %d is a dictionary element' % i)
    if tuple(row) in component_set:
        ids.write(str(i) + '\n')
        component_set.remove(tuple(row))
예제 #48
0
def test_dict_learning_online_estimator_shapes():
    n_components = 5
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, random_state=0)
    dico.fit(X)
    assert_true(dico.components_.shape == (n_components, n_features))
def get_dictionary_data(n_comp=20, zero_index=False):
    unlabeled = util.load_unlabeled_training(flatten=False)
    height, width = 32, 32
    n_images = 10000
    patch_size = (8, 8)

    unlabeled = util.standardize(unlabeled)
    np.random.shuffle(unlabeled)

    print('Extracting reference patches...')

    patches = np.empty((0, 64))
    t0 = time()

    for image in unlabeled[:n_images, :, :]:
        data = np.array(extract_patches_2d(image, patch_size, max_patches=0.01))
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20
        patches = np.concatenate([patches, data])

    print('done in %.2fs.' % (time() - t0))

    # whiten the patches
    z = zca.ZCA()
    z.fit(patches)
    z.transform(patches)

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1)
    V = dico.fit(patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #plt.figure(figsize=(4.2, 4))
    #for i, comp in enumerate(V[:100]):
    #    plt.subplot(10, 10, i + 1)
    #    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
    #               interpolation='nearest')
    #    plt.xticks(())
    #    plt.yticks(())
    #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
    #plt.show()

    labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True)
    labeled_data = util.standardize(labeled_data)

    test_data = util.load_all_test(flatten=False)
    test_data = util.standardize(test_data)

    #util.render_matrix(test_data, flattened=False)

    print('Reconstructing the training images...')
    t0 = time()
    reconstructed_images = np.empty((0, 32, 32))

    for i, image in enumerate(labeled_data):
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_images = np.concatenate([reconstructed_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_images.shape
    training_images = reconstructed_images.reshape(reconstructed_images.shape[0], reconstructed_images.shape[1]*reconstructed_images.shape[2])
    assert training_images.shape == (n, x*y)

    print('Reconstructing the test images...')
    t0 = time()
    reconstructed_test_images = np.empty((0, 32, 32))

    for image in test_data:
        data = extract_patches_2d(image, patch_size)
        data = data.reshape(data.shape[0], -1)
        data -= np.mean(data, axis=0)
        data /= np.std(data, axis=0) + 1e-20

        code = dico.transform(data)
        patches = np.dot(code, V)
        z.transform(patches)
        patches = patches.reshape(len(data), *patch_size)

        data = reconstruct_from_patches_2d(patches, (width, height))
        data = data.reshape(1, 32, 32)
        reconstructed_test_images = np.concatenate([reconstructed_test_images, data])

    print('done in %.2fs.' % (time() - t0))

    # flatten
    n, x, y = reconstructed_test_images.shape
    test_images = reconstructed_test_images.reshape(reconstructed_test_images.shape[0], reconstructed_test_images.shape[1]*reconstructed_test_images.shape[2])
    assert test_images.shape == (n, x*y)

    return (training_images, labels, test_images)
n_appliance = 1
args = model_args.loc[n_appliance - 1, :]
dataset = args[1]
precision = args[2]
denoised = args[4]
ids = args[7].split(',')
datasets_dir = './data/%s.csv'
data = dataset_loader(datasets_dir % dataset,
                      ids,
                      precision=precision,
                      denoised=denoised)
aggregate = data.WHE.tail(36 * 1440)
# train data for dictionary learning
train_data = data.tail(1440)
model = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=30)
model.fit(train_data.T)
comp = model.components_
basics = comp.T

n_componetss = model.n_components
transforms = model.transform(train_data.T)
activations = transforms.T
reconstruction = np.matmul(basics, activations)
print("Dictionary Learning RMSE for appliance  is %s" %
      (mean_squared_error(reconstruction, train_data)**(.5)))
## SparseCoder for appliance
model = SparseCoder(dictionary=basics.T,
                    positive_code=True,
                    transform_algorithm='lasso_lars')
predicted_activations = model.transform(train_data.T).T
print_appliance_wise_errors(predicted_activations, basics, n_componetss)
예제 #51
0
class SparseApproxSpectrum(object):
    def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs):
        self.omp = OrthogonalMatchingPursuit()
        self.n_components = n_components
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.D = None
        self.data = None
        self.components = None
        self.standardize=False

    def _extract_data_patches(self, X):
        self.X = X
        data = extract_patches_2d(X, self.patch_size)
        data = data.reshape(data.shape[0], -1)
        if len(data)>self.max_samples:
            data = np.random.permutation(data)[:self.max_samples]
        print data.shape
        if self.standardize:
            self.mn = np.mean(data, axis=0) 
            self.std = np.std(data, axis=0)
            data -= self.mn
            data /= self.std
        self.data = data

    def extract_codes(self, X, standardize=False):
        self.standardize=standardize
        self._extract_data_patches(X)
        self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, alpha=1, n_iter=500)
        print "Dictionary learning from data..."
        self.D = self.dico.fit(self.data)
        return self

    def plot_codes(self, cbar=False, **kwargs):
        #plt.figure(figsize=(4.2, 4))
        N = int(np.ceil(np.sqrt(self.n_components)))
        kwargs.setdefault('cmap', pl.cm.gray_r)
        kwargs.setdefault('origin','bottom')
        kwargs.setdefault('interpolation','nearest')
        for i, comp in enumerate(self.D.components_):
            plt.subplot(N, N, i + 1)
            comp  = comp * self.std + self.mn if self.standardize else comp
            plt.imshow(comp.reshape(self.patch_size), **kwargs)
            if cbar:
                plt.colorbar()
            plt.xticks(())
            plt.yticks(())
        plt.suptitle('Dictionary learned from spectrum patches\n', fontsize=16)
        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav',**kwargs):
        flist=glob.glob(dir_expr)
        self.X = np.vstack([feature_scale(LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T
        self.D = extract_codes(self.X, **kwargs)
        self.plot_codes(**kwargs)
        return self

    def _get_approximation_coefs(self,data, components):
        w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data])
        return w

    def reconstruct_spectrum(self, w=None, randomize=False):
        data = self.data
        components = self.D.components_
        if w is None:
            self.w = self._get_approximation_coefs(data, components)
            w = self.w
        if self.standardize:
            for comp in components: comp  = comp * self.std + self.mn
        if randomize:
            components = np.random.permutation(components)
        recon = np.dot(w, components).reshape(-1,self.patch_size[0],self.patch_size[1])
        self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape)
        return self

    def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, **kwargs):
        self.reconstruct_spectrum(w,randomize)
        w, components = self.w, self.D.components_
        self.X_hat_l = []
        for i in range(len(self.w.T)):
            r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,self.patch_size[0],self.patch_size[1])
            self.X_hat_l.append(reconstruct_from_patches_2d(r, self.X.shape))
        if plotting:
            plt.figure()            
            for k in range(self.n_components):
                plt.subplot(self.n_components**0.5,self.n_components**0.5,k+1)
                feature_plot(self.X_hat_l[k],nofig=1,**kwargs)
        return self
# Mini-batch dictionary learning
from sklearn.decomposition import MiniBatchDictionaryLearning

n_components = 28
alpha = 1
batch_size = 200
n_iter = 10
random_state = 2018

miniBatchDictLearning = MiniBatchDictionaryLearning(n_components=n_components,
                                                    alpha=alpha,
                                                    batch_size=batch_size,
                                                    n_iter=n_iter,
                                                    random_state=random_state)

miniBatchDictLearning.fit(X_train)
X_train_miniBatchDictLearning = miniBatchDictLearning.fit_transform(X_train)
X_train_miniBatchDictLearning = pd.DataFrame(
    data=X_train_miniBatchDictLearning, index=X_train.index)

scatterPlot(X_train_miniBatchDictLearning, y_train,
            "Mini-batch Dictionary Learning")

# In[57]:

X_train_miniBatchDictLearning_inverse = np.array(
    X_train_miniBatchDictLearning).dot(miniBatchDictLearning.components_)

X_train_miniBatchDictLearning_inverse = pd.DataFrame(
    data=X_train_miniBatchDictLearning_inverse, index=X_train.index)
예제 #53
0
class SparseApproxSpectrum(object):
    """class for 2D patch analysis of audio files
    initialization:
    	patch_size - size of time-frequency 2D patches in spectrogram units (freq,time) [(12,12)]
    	max_samples - if num audio patches exceeds this threshold, randomly sample spectrum [1000000]
        **omp_args - keyword arguments to OrthogonalMatchingPursuit(...) [None]
    """
    def __init__(self, patch_size=(12, 12), max_samples=1000000, **omp_args):
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.omp = OrthogonalMatchingPursuit(**omp_args)
        self.D = None
        self.data = None
        self.components = None
        self.zscore = False
        self.log_amplitude = False

    def _extract_data_patches(self, X, zscore, log_amplitude):
        "utility method for converting spectrogram data to 2D patches "
        self.zscore = zscore
        self.log_amplitude = log_amplitude
        self.X = X
        if self.log_amplitude:
            X = np.log(1 + X)
        data = extract_patches_2d(X, self.patch_size)
        data = data.reshape(data.shape[0], -1)
        if len(data) > self.max_samples:
            data = np.random.permutation(data)[:self.max_samples]
        print data.shape
        if self.zscore:
            self.mn = np.mean(data, axis=0)
            self.std = np.std(data, axis=0)
            data -= self.mn
            data /= self.std
        self.data = data

    def make_gabor_field(self,
                         X,
                         zscore=True,
                         log_amplitude=True,
                         thetas=range(4),
                         sigmas=(1, 3),
                         frequencies=(0.05, 0.25)):
        """Given a spectrogram, prepare 2D patches and Gabor filter bank kernels
        inputs:
           X - spectrogram data (frequency x time)
           zscore - whether to zscore the ensemble of 2D patches [True]
           log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True]
           thetas - list of 2D Gabor filter orientations in units of pi/4. [range(4)]
           sigmas - list of 2D Gabor filter standard deviations in oriented direction [(1,3)]
           frequencies - list of 2D Gabor filter frequencies [(0.05,0.25)]
        outputs:
           self.data - 2D patches of input spectrogram
           self.D.components_ - Gabor dictionary of thetas x sigmas x frequencies atoms
        """
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components = len(thetas) * len(sigmas) * len(frequencies)
        self.thetas = thetas
        self.sigmas = sigmas
        self.frequencies = frequencies
        a, b = self.patch_size
        self.kernels = []
        for theta in thetas:
            theta = theta / 4. * np.pi
            for sigma in sigmas:
                for frequency in frequencies:
                    kernel = np.real(
                        gabor_kernel(frequency,
                                     theta=theta,
                                     sigma_x=sigma,
                                     sigma_y=sigma))
                    c, d = kernel.shape
                    if c <= a:
                        z = np.zeros(self.patch_size)
                        z[(a / 2 - c / 2):(a / 2 - c / 2 + c),
                          (b / 2 - d / 2):(b / 2 - d / 2 + d)] = kernel
                    else:
                        z = kernel[(c / 2 - a / 2):(c / 2 - a / 2 + a),
                                   (d / 2 - b / 2):(d / 2 - b / 2 + b)]
                    self.kernels.append(z.flatten())

        class Bunch:
            def __init__(self, **kwds):
                self.__dict__.update(kwds)

        self.D = Bunch(components_=np.vstack(self.kernels))

    def extract_codes(self,
                      X,
                      n_components=16,
                      zscore=True,
                      log_amplitude=True,
                      **mbl_args):
        """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data
        inputs:
            X - spectrogram data (frequency x time)
    	    n_components - how many components to extract [16]
            zscore - whether to zscore the ensemble of 2D patches [True]
            log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True]
            **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None]
        outputs:
            self.data - 2D patches of input spectrogram
            self.D.components_ - dictionary of learned 2D atoms for sparse coding
        """
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components = n_components
        self.dico = MiniBatchDictionaryLearning(n_components=self.n_components,
                                                **mbl_args)
        print "Dictionary learning from data..."
        self.D = self.dico.fit(self.data)

    def plot_codes(self, cbar=False, show_axis=False, **kwargs):
        "plot the learned or generated 2D sparse code dictionary"
        N = int(np.ceil(np.sqrt(self.n_components)))
        kwargs.setdefault('cmap', plt.cm.gray_r)
        kwargs.setdefault('origin', 'bottom')
        kwargs.setdefault('interpolation', 'nearest')
        for i, comp in enumerate(self.D.components_):
            plt.subplot(N, N, i + 1)
            plt.imshow(comp.reshape(self.patch_size), **kwargs)
            if cbar:
                plt.colorbar()
            if not show_axis:
                plt.axis('off')
            plt.xticks(())
            plt.yticks(())
            plt.title('%d' % (i))
        plt.suptitle('Dictionary of Spectrum Patches\n', fontsize=14)
        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    def extract_audio_dir_codes(
            self,
            dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav',
            **mbl_args):
        """apply dictionary learning to entire directory of audio files (requires LOTS of RAM)
            inputs:
                **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None]
        """
        flist = glob.glob(dir_expr)
        self.X = np.vstack([
            br.feature_scale(br.LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,
                             normalize=1).T for f in flist
        ]).T
        self.D = extract_codes(self.X, **mbl_args)

    def _get_approximation_coefs(self, data, components):
        """utility function to fit dictionary components to data
    	inputs:
    		data - spectrogram data (frqeuency x time) [None]
    	  components - the dictionary components to fit to the data [None]
        """
        w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data])
        return w

    def reconstruct_spectrum(self, w=None, randomize=False):
        """reconstruct by fitting current 2D dictionary to self.data 
        inputs:
            w - per-component reconstruction weights [None=calculate weights]
            randomize - randomly permute components after getting weights [False]
        returns:
            self.X_hat - spectral reconstruction of self.data
        """
        data = self.data
        components = self.D.components_
        if w is None:
            self.w = self._get_approximation_coefs(data, components)
            w = self.w
        if randomize:
            components = np.random.permutation(components)
        recon = np.dot(w, components)
        if self.zscore:
            recon = recon * self.std
            recon = recon + self.mn
        recon = recon.reshape(-1, *self.patch_size)
        self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape)
        if self.log_amplitude:
            self.X_hat = np.exp(self.X_hat) - 1.0  # invert log transform

    def reconstruct_individual_spectra(self,
                                       w=None,
                                       randomize=False,
                                       plotting=False,
                                       rectify=True,
                                       **kwargs):
        """fit each dictionary component to self.data
        inputs:
            w - per-component reconstruction weights [None=calculate weights]
            randomize - randomly permute components after getting weights [False]
            plotting - whether to subplot individual spectrum reconstructions [True]
            rectify- remove negative ("dark energy") from individual reconstructions [True]
            **kwargs - keyword arguments for plotting
        returns:
            self.X_hat_l - list of indvidual spectrum reconstructions per dictionary atom
        """
        omp_args = {}
        self.reconstruct_spectrum(w, randomize, **omp_args)
        w, components = self.w, self.D.components_
        self.X_hat_l = []
        for i in range(len(self.w.T)):
            r = np.array(
                (np.matrix(w)[:, i] * np.matrix(components)[i, :])).reshape(
                    -1, *self.patch_size)
            X_hat = reconstruct_from_patches_2d(r, self.X.shape)
            if self.log_amplitude:
                X_hat = np.exp(X_hat) - 1.0
            if rectify:  # half wave rectification
                X_hat[X_hat < 0] = 0
            self.X_hat_l.append(X_hat)
        if plotting:
            self.plot_individual_spectra(**kwargs)

    def plot_individual_spectra(self, **kwargs):
        "plot individual spectrum reconstructions for self.X_hat_l"
        if self.X_hat_l is None: return
        plt.figure()
        rn = np.ceil(self.n_components**0.5)
        for k in range(self.n_components):
            plt.subplot(rn, rn, k + 1)
            br.feature_plot(self.X_hat_l[k], nofig=1, **kwargs)
            plt.title('%d' % (k))
        plt.suptitle('Component Reconstructions\n', fontsize=14)
예제 #54
0
class Layer(object):

    def __init__(self, hierarchy, depth, patch_size, num_features, num_patches, multiplier):
        """
         * depth - hierarchy level (1, 2, 3, etc.)
         * patch_size - number of pixels representing side of the square patch.
           like, 8 (8x8 patches)
         * num_features - how many components to learn
         * multiplier - num of subpatches we break patch into
           (0 for the first level). if 3, patch will contant 3x3 subpatches.
        """
        self.hierarchy = hierarchy
        self.depth = depth
        self.basement_size = patch_size
        self.num_features = num_features
        self.num_patches = num_patches
        self.multiplier = multiplier
        self.learning = MiniBatchDictionaryLearning(
            n_components=num_features, n_iter=3000, transform_algorithm='lasso_lars', transform_alpha=0.5, n_jobs=2)
        self.ready = False

    def get_data(self, data, max_patches=None):
        """
        Extracts raw data from patches.
        """
        max_patches = max_patches or self.num_patches
        if isinstance(data, np.ndarray):
            # one image
            patches = extract_patches_2d(
                data, (self.basement_size, self.basement_size), max_patches=max_patches)
        else:
            patches = []
            # multiple images
            for i in xrange(max_patches):
                idx = np.random.randint(len(data))  # selecting random image
                dx = dy = self.basement_size
                if data[idx].shape[0] <= dx or data[idx].shape[1] <= dy:
                    continue
                x = np.random.randint(data[idx].shape[0] - dx)
                y = np.random.randint(data[idx].shape[1] - dy)
                patch = data[idx][x: x + dx, y: y + dy]
                patches.append(patch.reshape(-1))
            patches = np.vstack(patches)
            patches = patches.reshape(patches.shape[0], self.basement_size, self.basement_size)
        print 'patches', patches.shape
        patches = preprocessing.scale(patches)
        return patches

    def learn(self, data):
        data = data.reshape(data.shape[0], -1)
        self.learning.fit(data)
        self.ready = True

    @property
    def output_size(self):
        return int(np.sqrt(self.num_features))

    @property
    def input_size(self):
        if self.depth == 0:
            return self.basement_size
        else:
            prev_layer = self.hierarchy.layers[self.depth - 1]
            r = prev_layer.output_size * self.multiplier
            return r
        return self._input_size

    @property
    def features(self):
        return self.learning.components_

    # def get_features(self):
    #     # going from up to down
    #     result = []
    #     layers = self.hierarchy.layers[: self.depth][::-1]
    #     if self.depth == 0:
    #         return self.features

    #     previous_layer = self.hierarchy.layers[self.depth - 1]
    #     for feature in self.features:
    #         multiplier = self.multiplier
    #         feature = feature.reshape(self.multiplier * previous_layer.output_size,
    #                                   self.multiplier * previous_layer.output_size,)
    #         for other_layer in layers:
    #             expressed_feature = np.empty((multiplier * other_layer.input_size,
    #                                           multiplier * other_layer.input_size))
    #             enc_n = other_layer.output_size
    #             n = other_layer.input_size
    #             for dx in range(multiplier):
    #                 for dy in range(multiplier):
    #                     encoded_subfeature = feature[dx * enc_n: (dx + 1) * enc_n,
    #                                                  dy * enc_n: (dy + 1) * enc_n]
    #                     prev_patch = np.dot(encoded_subfeature.reshape(-1), other_layer.features)
    #                     expressed_feature[dx * n: (dx + 1) * n, dy * n: (dy + 1) * n] = prev_patch.reshape(n, n)
    #             feature = expressed_feature
    #             multiplier *= other_layer.multiplier
    #         result.append(expressed_feature.reshape(-1))
    #     result = np.vstack(result)
    #     return result

    def get_features(self):
        # going from down to up. these two methods are look like the same
        if self.depth == 0:
            return self.features
        layers = self.hierarchy.layers[1: self.depth + 1]  # down --> up
        features = self.hierarchy.layers[0].features  # to express upper feature

        for i, layer in enumerate(layers, start=1):
            previous_layer = self.hierarchy.layers[i - 1]
            expressed_features = []
            for feature in layer.features:
                n = previous_layer.output_size
                m = int(np.sqrt(features.shape[1]))
                feature = feature.reshape((layer.input_size, layer.input_size))
                expressed_feature = np.empty((layer.multiplier * m,
                                              layer.multiplier * m))
                for dx in range(layer.multiplier):
                    for dy in range(layer.multiplier):
                        subfeature = feature[dx * n: (dx + 1) * n, dy * n: (dy + 1) * n]
                        # now that's previous_layer's code. replace it with reconstruction
                        expressed_subfeature = np.dot(subfeature.reshape(-1), features)
                        expressed_feature[dx * m: (dx + 1) * m, dy * m: (dy + 1) * m] = expressed_subfeature.reshape((m, m))
                expressed_features.append(expressed_feature.reshape(-1))
            features = np.vstack(expressed_features)
        return features
예제 #55
0
    elif title is 'T-MOD javaORMP (Sparsity: 5)':
        continue
    elif title is 'K-HOSVD javaORMP (Sparsity: 5)':
        dictionary = np.loadtxt(
            filePath +
            'dictK-HOSVDNoisy_L=46500_K=100_noIt=50_solver=javaORMP_tnz=5.csv',
            delimiter=';')
        sparseCode = np.loadtxt(
            filePath +
            'sparseCodeK-HOSVDNoisy_L=46500_K=100_noIt=50_solver=javaORMP_tnz=5.csv',
            delimiter=';')
    else:
        miniBatch = MiniBatchDictionaryLearning(n_components=100,
                                                alpha=1,
                                                n_iter=50)
        dictionary = miniBatch.fit(refPatches).components_
        miniBatch.set_params(transform_algorithm=transform_algorithm, **kwargs)
        sparseCode = miniBatch.transform(noisyPatches)
    recPatches = np.dot(sparseCode, dictionary)
    recPatches += noiseMean
    recPatches = recPatches.reshape(len(noisyPatches), *patch_size)

    if transform_algorithm == 'threshold':
        recPatches -= recPatches.min()
        recPatches /= recPatches.max()

    # Plot dictionaries
    # plt.figure(figsize=(4.2, 4))
    # for i, comp in enumerate(dictionary[:100]):
    # 	plt.subplot(10, 10, i + 1)
    # 	plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, interpolation='nearest')
예제 #56
0
def main(argv=None):
    if argv is None:
        argv=sys.argv

    parser = OptionParser(add_help_option=False)
    parser.add_option("-i", dest="imgJSON")
    parser.add_option("-c", dest="cfgJSON")
    parser.add_option("-a", dest="outAtomFile")
    parser.add_option("-d", dest="outDiffFile")
    parser.add_option("-x", dest="outImagFile")
    parser.add_option("-s", dest="imSlice", type="int")
    parser.add_option("-r", dest="imScale", type="float")
    parser.add_option("-D", dest="dictSiz", type="int", default=5)
    parser.add_option("-k", dest="nearest", type="int", default=5)
    parser.add_option("-h", dest="doHelp", action="store_true", default=False)
    options, _ = parser.parse_args()

    if options.doHelp:
        usage()
        sys.exit(-1)

    imgJSON = options.imgJSON
    cfgJSON = options.cfgJSON

    outAtomFile = options.outAtomFile
    outImagFile = options.outImagFile
    outDiffFile = options.outDiffFile

    imSlice = options.imSlice
    dictSiz = options.dictSiz
    imScale = options.imScale
    nearest = options.nearest

    imData = json.load(open(imgJSON))
    helper = regtools.regtools(cfgJSON)

    groupSet = set()
    groupMap = dict()
    groupLab = []

    # generate numeric labels for each image
    for entry in imData["Data"]: groupSet.add(entry["Group"])
    for cnt, group in enumerate(groupSet): groupMap[group] = cnt
    for entry in imData["Data"]: groupLab.append(groupMap[entry["Group"]])

    imgFiles = []
    [imgFiles.append(str(e["Source"])) for e in imData["Data"]]

    dataList = []
    for i, imFile in enumerate(imgFiles):
        im0 = sitk.ReadImage(imFile)
        im1 = pbmutils.imResize(im0, imScale)
        imSz = sitk.GetArrayFromImage(im1).shape
        helper.infoMsg("Image size : (%d,%d,%d)" % imSz)
        if not imSlice is None:
            sl0 = pbmutils.imSlice(im1, [0, 0, imSlice])
            dataList.append(sl0.ravel())
        else:
            dataList.append(sitk.GetArrayFromImage(im1).ravel())
        helper.infoMsg("Done with image %d!" % i)

    # write raw image data
    if not outImagFile is None:
        tfid = open(outImgFile, 'w')
        np.reshape(np.asmatrix(dataList).T,-1).astype('float32').tofile(tfid)
        tfid.close()

    # build difference images
    diffIm = pbmutils.groupDiff(np.asmatrix(dataList).T, groupLab, nearest)
    helper.infoMsg("Difference image matrix (%d x %d)" % diffIm.shape)

    # write raw difference data
    if not outDiffFile is None:
        outFid = open(outDiffFile, 'w')
        np.reshape(diffIm, -1).ravel().astype('float32').tofile(outFid)
        outFid.close()

    # create the dictionary learner and run (alpha=1)
    lrnObj = MiniBatchDictionaryLearning(dictSiz, 1, verbose=True)
    lrnRes = lrnObj.fit(np.asmatrix(diffIm).T).components_

    # write dictionary atoms
    if not outAtomFile is None:
        outFid = open(outAtomFile, 'w')
        np.reshape(lrnRes.T, -1).ravel().astype('float32').tofile(outFid)
        outFid.close()
예제 #57
0
# Data pre-processing: Normalization
# row_sums = train_X.sum(axis=1).astype(float)
# train_X = np.true_divide(train_X, row_sums[:, np.newaxis])
#
# row_sums = test_X.sum(axis=1).astype(float)
# test_X = np.true_divide(test_X, row_sums[:, np.newaxis])

###############################################################################
# Dictionary Learning
n_components = 100

print("\nSparse Coding Dictionary Learning")
# pca = RandomizedPCA(n_components=n_components).fit(train_X)
dl = MiniBatchDictionaryLearning(n_components, batch_size=50, n_jobs=4, verbose=2)
dl.fit(train_X)

print "X_train.shape", train_X.shape
print "Components shape", dl.components_.shape

# components = dl.components().reshape((n_components, n_features))
components = dl.components_

# Visualizing the components as images
component_titles = ["%d" % i for i in range(components.shape[0])]
plot_gallery("Visualizing top components", components, w, h, n_row=n_components / 10, n_col=10)
plt.show()

###############################################################################
# Sparse Encoding
print("\nSparse Encoding")
print('Extracting reference patches...')
t0 = time()
patch_size = (7, 7)
data = extract_patches_2d(distorted[:, :height // 2], patch_size)
data = data.reshape(data.shape[0], -1)
data -= np.mean(data, axis=0)
data /= np.std(data, axis=0)
print('done in %.2fs.' % (time() - t0))

###############################################################################
# Learn the dictionary from reference patches

print('Learning the dictionary...')
t0 = time()
dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=500)
V = dico.fit(data).components_
dt = time() - t0
print('done in %.2fs.' % dt)

plt.figure(figsize=(4.2, 4))
for i, comp in enumerate(V[:100]):
    plt.subplot(10, 10, i + 1)
    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
               interpolation='nearest')
    plt.xticks(())
    plt.yticks(())
plt.suptitle('Dictionary learned from Lena patches\n' +
             'Train time %.1fs on %d patches' % (dt, len(data)),
             fontsize=16)
plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
def imageDenoisingTest01():
	from time import time
	import matplotlib.pyplot as plt
	import numpy as np

	from scipy.misc import lena

	from sklearn.decomposition import MiniBatchDictionaryLearning
	from sklearn.feature_extraction.image import extract_patches_2d
	from sklearn.feature_extraction.image import reconstruct_from_patches_2d

	#Load image and extract patches
	lena = lena() / 256.0




	lena = lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2]
	lena /= 4.0

	height, width = lena.shape

	#Distort the right half of the image
	print "distorting image"

	distorted = lena.copy()
	distorted[:, height//2:] += 0.075 * np.random.randn(width, height // 2)

	#plt.imshow(distorted[:, :height//2], cmap = plt.cm.gray, interpolation = "nearest")
	#plt.show()

	print "Extacting reference patches"
	#这里是从distorted的左半边抽取patches
	t0 = time()
	patch_size = (7, 7)
	data = extract_patches_2d(distorted[:, :height//2], patch_size)

	#data是 30500 * 7 * 7 维矩阵
	#print data
	#print len(data)
	#print len(data[0][0])

	#plt.imshow(data[0], cmap = plt.cm.gray, interpolation = "nearest")
	#plt.show()

	#print distorted[:, height//2:].shape #一半是256 * 128




	#下面是把patch转换为一维向量, 然后再归一化
	data = data.reshape(data.shape[0], -1)
	data -= np.mean(data, axis = 0)
	data /= np.std(data, axis = 0)

	print 'done in ' + str(time() - t0)


	# Learn the dictionary from reference patches
	print "Learning the dictionary"
	t0 = time()
	#这一步是开始对patches进行学习
	#new 一个model
	dico = MiniBatchDictionaryLearning(n_components = 100, alpha = 1, n_iter = 5000)

	print data.shape  #data是30500 * 49维矩阵
	V = dico.fit(data).components_

	print V.shape #V是100 * 49维矩阵
	dt = time() - t0

	print "done in %.2fs." % dt

	plt.figure(figsize = (4.2, 4))
	for i, comp in enumerate(V[:100]):
		plt.subplot(10, 10, i + 1)
		plt.imshow(comp.reshape(patch_size), cmap = plt.cm.gray_r, interpolation = "nearest")
		plt.xticks(())
		plt.yticks(())

	plt.suptitle("Dictionary learned from lena patches\n" + "Train time %.1fs on %d patches" % (dt, len(data)), fontsize = 16)

	plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

	def show_with_diff(image, reference, title):
		plt.figure(figsize = (5, 3.3))
		plt.subplot(1, 2, 1)
		plt.title('Image')
		plt.imshow(image, vmin = 0, vmax = 1, cmap = plt.cm.gray, interpolation = "nearest")

		plt.xticks(())
		plt.yticks(())
		plt.subplot(1,2,2)

		difference = image - reference

		plt.title("difference (norm: %.2f)" % np.sqrt(np.sum(difference ** 2)))

		plt.imshow(difference, vmin = -0.5, vmax = 0.5, cmap = plt.cm.PuOr, interpolation = "nearest")
		plt.xticks(())
		plt.yticks(())
		plt.suptitle(title, size = 16)

		plt.subplots_adjust(0.02, 0.02, 0.98, 0.79, 0.02, 0.02)


	show_with_diff(distorted, lena, "Distorted Image")




	#plt.show()

	#Extract noisy patches and reconstruct them using the dictionary
	#从右半边抽取patches
	print('Extracting noisy pathces...')
	t0 = time()
	data = extract_patches_2d(distorted[:, height//2:], patch_size)
	data = data.reshape(data.shape[0], -1)
	intercept = np.mean(data, axis = 0)
	data -= intercept

	print "done in %.2fs. " % (time() - t0)

	transform_algorithms = [('Orthogonal Matching Pursuit\n1 atom', 'omp',
							{'transform_n_nonzero_coefs': 1}),
							('Orthogonal Matching Pursuit\n2 atoms', 'omp',
							{'transform_n_nonzero_coefs': 2}),
							('Least-angle regression\n5 atoms', 'lars',
							{'transform_n_nonzero_coefs': 5}),
							('Thresholding\n alpha = 0.1', 'threshold',
							{'transform_alpha': 0.1})]

	reconstructions = {}
	for title, transform_algorithm, kwargs in transform_algorithms:
		print title + "..."
		reconstructions[title] = lena.copy()
		t0 = time()
		dico.set_params(transform_algorithm = transform_algorithm, **kwargs)
		code = dico.transform(data) #利用之前训练的模型来获得代表系数 -- code
		patches = np.dot(code, V)

		if transform_algorithm == "threshold":
			patches -= patches.min()
			patches /= patches.max()

		patches += intercept
		patches = patches.reshape(len(data), *patch_size)

		if transform_algorithm == "threshold":
			patches -= patches.min()
			patches /= patches.max()

		reconstructions[title][:, height // 2:] = reconstruct_from_patches_2d(patches, (width, height // 2))
		dt = time() - t0
		print "done in %.2fs." % dt
		show_with_diff(reconstructions[title], lena, title + '(time: %.1fs)' % dt)

	plt.show()