Exemple #1
0
 def MiniBach_DictionaryLearning(self,N_component):
     MiniDL_calculator = skdecomp.MiniBatchDictionaryLearning(N_component,batch_size = 25)
     self.MiniDLs = MiniDL_calculator.fit(self.vector_centered)
     all_MiniDLs = self.MiniDLs.components_
     pp.save_variable(all_MiniDLs,save_folder+r'\\Dictionary_Learning_Data.pkl')
     print('MiniBach Dictionary Learning Done, generating graphs')
     self.cell_graph_plot('Dictionary_Learning',all_MiniDLs)
Exemple #2
0
 def __init__(self, dataFile, outputFile, size):
     data = np.loadtxt(open(dataFile, "rb"), delimiter=",", skiprows=0)
     dictionary = decomposition.MiniBatchDictionaryLearning(
         n_components=size, alpha=1, n_iter=500).fit(data)
     dictionaryData = pickle.dumps(dictionary)
     f = open(outputFile, "w")
     f.write(dictionaryData)
     f.close()
Exemple #3
0
def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
    plt.figure(figsize=(4. * n_col, 2.26 * n_row,))
    plt.suptitle(title, size=16)
    for i, comp in enumerate(images):
        plt.subplot(n_row, n_col, i + 1)
        vmax = max(comp.max(), -comp.min())
        plt.imshow(comp.reshape(image_shape), cmap=cmap,
                   interpolation='nearest',
                   vmin=-vmax, vmax=vmax)
        plt.xticks(())
        plt.yticks(())
    plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)
    estimators = [
    ('Dictionary learning',
        decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1,
                                                  n_iter=50, batch_size=2,
                                                  random_state=rng ),
     True),
    ('Dictionary learning - positive dictionary',
        decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1,
                                                  n_iter=50, batch_size=2,
                                                  random_state=rng,
                                                  positive_dict=True),
     True),
    ('Dictionary learning - positive code',
        decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1,
                                                  n_iter=50, batch_size=2,
                                                  fit_algorithm='cd',
                                                  random_state=rng,
                                                  positive_code=True),
     True),
    ('Dictionary learning - positive dictionary & code',
        decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1,
                                                  n_iter=50, batch_size=2,
                                                  fit_algorithm='cd',
                                                  random_state=rng,
                                                  positive_dict=True,
                                                  positive_code=True),
     True),
]
Exemple #4
0
def gen_estimators():
    '''
    List of the different estimators, whether to center and transpose the problem, and whether the transformer uses the clustering API.
    '''
    rng = RandomState(0)
    estimators = [
        ('Eigenfaces - RandomizedPCA',
         decomposition.RandomizedPCA(n_components=n_components,
                                     whiten=True), True),
        ('Non-negative components - NMF tol=1e-4',
         decomposition.NMF(n_components=n_components,
                           init='nndsvda',
                           tol=1e-4,
                           solver='cd'), False),
        ('Non-negative components - NMF tol=1e-6',
         decomposition.NMF(
             n_components=n_components,
             init='nndsvd',
         ), False),
        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True), True),
        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components,
                                          alpha=0.8,
                                          n_iter=100,
                                          batch_size=3,
                                          random_state=rng), True),
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15,
                                                   alpha=0.1,
                                                   n_iter=50,
                                                   batch_size=3,
                                                   random_state=rng), True),
        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components,
                         tol=1e-3,
                         batch_size=20,
                         max_iter=50,
                         random_state=rng), True),
        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components,
                                      max_iter=2), True),
    ]
    return estimators
Exemple #5
0
 def create_estimator(self):
     if self.estimator_name == 'K-means':
         n_components = self.param_dict['n_components']
         ninit = self.param_dict['n_init']
         self.estimator = cluster.KMeans(algorithm='full',
                                         n_clusters=n_components,
                                         n_init=ninit,
                                         n_jobs=self.n_jobs)
         self.estimator_param_string = '_n_components_' + str(
             n_components) + '_'
     elif self.estimator_name == 'EM':
         n_components = self.param_dict['n_components']
         covariance_type = 'full'
         max_iter = self.param_dict['max_iter']
         self.estimator_param_string = '_n_components_' + str(
             n_components) + '_'
         self.estimator = mixture.GaussianMixture(
             n_components=n_components,
             covariance_type=covariance_type,
             max_iter=max_iter)
     elif self.estimator_name == 'PCA':
         n_components = self.param_dict['n_components']
         self.estimator_param_string = '_n_components_' + str(
             n_components) + '_'
         self.estimator = decomposition.PCA(n_components=n_components)
     elif self.estimator_name == 'ICA':
         n_components = self.param_dict['n_components']
         self.estimator_param_string = '_n_components_' + str(
             n_components) + '_'
         self.estimator = decomposition.FastICA(n_components=n_components,
                                                max_iter=1000)
     elif self.estimator_name == 'Random_Projection':
         n_components = self.param_dict['n_components']
         self.estimator_param_string = '_n_components_' + str(
             n_components) + '_'
         self.estimator = random_projection.GaussianRandomProjection(
             n_components=n_components)
     elif self.estimator_name == 'Dictionary_Learning':
         n_components = self.param_dict['n_components']
         self.estimator_param_string = '_n_components_' + str(
             n_components) + '_'
         alpha = self.param_dict['alpha']
         self.estimator = decomposition.MiniBatchDictionaryLearning(
             n_components=n_components, alpha=alpha, batch_size=20)
Exemple #6
0
nfeat = 15
rpca = decomposition.RandomizedPCA(n_components=nfeat, whiten=True)
rpca.fit(unlagged_stimuli)

unlagged_stimuli = rpca.transform(unlagged_stimuli)

#%%
#sparse pca
spca = decomposition.SparsePCA(n_jobs=-1)
spca.fit(unlagged_stimuli)

unlagged_stimuli = spca.transform(unlagged_stimuli)

#%%
#dictionary minibatch
mbdic = decomposition.MiniBatchDictionaryLearning(n_components=50,
                                                  verbose=True)
mbdic.fit(stimuli_patches)

#%%
#visualize

V = mbdic.components_
plt.figure()
for i, comp in enumerate(V):
    plt.subplot(10, 10, i + 1)
    plt.imshow(comp.reshape(patchsize), interpolation='nearest')

#%%
#now construct code representation for stimuli
codes = mbdic.transform(stimuli_patches[:sum(patch_stim_lens[:100]), :])
#how are these patches constructed?
Exemple #7
0
def _eval_search_params(params_builder):
    search_params = {}

    for p in params_builder['param_set']:
        search_list = p['sp_list'].strip()
        if search_list == '':
            continue

        param_name = p['sp_name']
        if param_name.lower().endswith(NON_SEARCHABLE):
            print("Warning: `%s` is not eligible for search and was "
                  "omitted!" % param_name)
            continue

        if not search_list.startswith(':'):
            safe_eval = SafeEval(load_scipy=True, load_numpy=True)
            ev = safe_eval(search_list)
            search_params[param_name] = ev
        else:
            # Have `:` before search list, asks for estimator evaluatio
            safe_eval_es = SafeEval(load_estimators=True)
            search_list = search_list[1:].strip()
            # TODO maybe add regular express check
            ev = safe_eval_es(search_list)
            preprocessings = (
                preprocessing.StandardScaler(), preprocessing.Binarizer(),
                preprocessing.MaxAbsScaler(), preprocessing.Normalizer(),
                preprocessing.MinMaxScaler(),
                preprocessing.PolynomialFeatures(),
                preprocessing.RobustScaler(), feature_selection.SelectKBest(),
                feature_selection.GenericUnivariateSelect(),
                feature_selection.SelectPercentile(),
                feature_selection.SelectFpr(), feature_selection.SelectFdr(),
                feature_selection.SelectFwe(),
                feature_selection.VarianceThreshold(),
                decomposition.FactorAnalysis(random_state=0),
                decomposition.FastICA(random_state=0),
                decomposition.IncrementalPCA(),
                decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
                decomposition.LatentDirichletAllocation(random_state=0,
                                                        n_jobs=N_JOBS),
                decomposition.MiniBatchDictionaryLearning(random_state=0,
                                                          n_jobs=N_JOBS),
                decomposition.MiniBatchSparsePCA(random_state=0,
                                                 n_jobs=N_JOBS),
                decomposition.NMF(random_state=0),
                decomposition.PCA(random_state=0),
                decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
                decomposition.TruncatedSVD(random_state=0),
                kernel_approximation.Nystroem(random_state=0),
                kernel_approximation.RBFSampler(random_state=0),
                kernel_approximation.AdditiveChi2Sampler(),
                kernel_approximation.SkewedChi2Sampler(random_state=0),
                cluster.FeatureAgglomeration(),
                skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS),
                skrebate.SURFstar(n_jobs=N_JOBS),
                skrebate.MultiSURF(n_jobs=N_JOBS),
                skrebate.MultiSURFstar(n_jobs=N_JOBS),
                imblearn.under_sampling.ClusterCentroids(random_state=0,
                                                         n_jobs=N_JOBS),
                imblearn.under_sampling.CondensedNearestNeighbour(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.EditedNearestNeighbours(random_state=0,
                                                                n_jobs=N_JOBS),
                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.InstanceHardnessThreshold(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.NearMiss(random_state=0,
                                                 n_jobs=N_JOBS),
                imblearn.under_sampling.NeighbourhoodCleaningRule(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.OneSidedSelection(random_state=0,
                                                          n_jobs=N_JOBS),
                imblearn.under_sampling.RandomUnderSampler(random_state=0),
                imblearn.under_sampling.TomekLinks(random_state=0,
                                                   n_jobs=N_JOBS),
                imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.RandomOverSampler(random_state=0),
                imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.BorderlineSMOTE(random_state=0,
                                                       n_jobs=N_JOBS),
                imblearn.over_sampling.SMOTENC(categorical_features=[],
                                               random_state=0,
                                               n_jobs=N_JOBS),
                imblearn.combine.SMOTEENN(random_state=0),
                imblearn.combine.SMOTETomek(random_state=0))
            newlist = []
            for obj in ev:
                if obj is None:
                    newlist.append(None)
                elif obj == 'all_0':
                    newlist.extend(preprocessings[0:35])
                elif obj == 'sk_prep_all':  # no KernalCenter()
                    newlist.extend(preprocessings[0:7])
                elif obj == 'fs_all':
                    newlist.extend(preprocessings[7:14])
                elif obj == 'decomp_all':
                    newlist.extend(preprocessings[14:25])
                elif obj == 'k_appr_all':
                    newlist.extend(preprocessings[25:29])
                elif obj == 'reb_all':
                    newlist.extend(preprocessings[30:35])
                elif obj == 'imb_all':
                    newlist.extend(preprocessings[35:54])
                elif type(obj) is int and -1 < obj < len(preprocessings):
                    newlist.append(preprocessings[obj])
                elif hasattr(obj, 'get_params'):  # user uploaded object
                    if 'n_jobs' in obj.get_params():
                        newlist.append(obj.set_params(n_jobs=N_JOBS))
                    else:
                        newlist.append(obj)
                else:
                    sys.exit("Unsupported estimator type: %r" % (obj))

            search_params[param_name] = newlist

    return search_params
                          batch_size=4,
                          n_jobs=-1)
mbsp.fit(X)
#X_transformed = transformer.transform(X)
# X_transformed.shape

# plt.plot(mbsp.components_[0,:]); plt.show()

#%%
X = data_pts_1

from sklearn import decomposition

mbdl = decomposition.MiniBatchDictionaryLearning(n_jobs=-1,
                                                 n_components=20,
                                                 alpha=0.1,
                                                 n_iter=200,
                                                 batch_size=5,
                                                 random_state=0)
mbdl.fit(X)
# plt.plot(mbdl.components_[0,:]); plt.show()
#%%

mbdlp = decomposition.MiniBatchDictionaryLearning(n_jobs=-1,
                                                  n_components=20,
                                                  alpha=1,
                                                  n_iter=100,
                                                  batch_size=5,
                                                  positive_dict=True,
                                                  random_state=0)
mbdlp.fit(X)
                       beta=5.0,
                       tol=5e-3,
                       sparseness='components'), False, False),
    ('Independent components - FastICA',
     decomposition.FastICA(n_components=n_components, whiten=True,
                           max_iter=10), True, True),
    ('Sparse comp. - MiniBatchSparsePCA',
     decomposition.MiniBatchSparsePCA(n_components=n_components,
                                      alpha=1e-3,
                                      n_iter=100,
                                      chunk_size=3,
                                      random_state=rng), True, False),
    ('MiniBatchDictionaryLearning',
     decomposition.MiniBatchDictionaryLearning(n_atoms=15,
                                               alpha=5e-3,
                                               n_iter=50,
                                               chunk_size=3,
                                               random_state=rng), True, False),
    ('Cluster centers - MiniBatchKMeans',
     MiniBatchKMeans(k=n_components,
                     tol=1e-3,
                     chunk_size=20,
                     max_iter=50,
                     random_state=rng), True, False)
]

###############################################################################
# Plot a sample of the input data

plot_gallery("First centered Olivetti faces", faces_centered[:n_components])
def get_search_params(params_builder):
    search_params = {}
    safe_eval = SafeEval(load_scipy=True, load_numpy=True)
    safe_eval_es = SafeEval(load_estimators=True)

    for p in params_builder['param_set']:
        search_p = p['search_param_selector']['search_p']
        if search_p.strip() == '':
            continue
        param_type = p['search_param_selector']['selected_param_type']

        lst = search_p.split(':')
        assert (
            len(lst) == 2
        ), "Error, make sure there is one and only one colon in search parameter input."
        literal = lst[1].strip()
        param_name = lst[0].strip()
        if param_name:
            if param_name.lower() == 'n_jobs':
                sys.exit("Parameter `%s` is invalid for search." % param_name)
            elif not param_name.endswith('-'):
                ev = safe_eval(literal)
                if param_type == 'final_estimator_p':
                    search_params['estimator__' + param_name] = ev
                else:
                    search_params['preprocessing_' + param_type[5:6] + '__' +
                                  param_name] = ev
            else:
                # only for estimator eval, add `-` to the end of param
                #TODO maybe add regular express check
                ev = safe_eval_es(literal)
                for obj in ev:
                    if 'n_jobs' in obj.get_params():
                        obj.set_params(n_jobs=N_JOBS)
                if param_type == 'final_estimator_p':
                    search_params['estimator__' + param_name[:-1]] = ev
                else:
                    search_params['preprocessing_' + param_type[5:6] + '__' +
                                  param_name[:-1]] = ev
        elif param_type != 'final_estimator_p':
            #TODO regular express check ?
            ev = safe_eval_es(literal)
            preprocessors = [
                preprocessing.StandardScaler(),
                preprocessing.Binarizer(),
                preprocessing.Imputer(),
                preprocessing.MaxAbsScaler(),
                preprocessing.Normalizer(),
                preprocessing.MinMaxScaler(),
                preprocessing.PolynomialFeatures(),
                preprocessing.RobustScaler(),
                feature_selection.SelectKBest(),
                feature_selection.GenericUnivariateSelect(),
                feature_selection.SelectPercentile(),
                feature_selection.SelectFpr(),
                feature_selection.SelectFdr(),
                feature_selection.SelectFwe(),
                feature_selection.VarianceThreshold(),
                decomposition.FactorAnalysis(random_state=0),
                decomposition.FastICA(random_state=0),
                decomposition.IncrementalPCA(),
                decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
                decomposition.LatentDirichletAllocation(random_state=0,
                                                        n_jobs=N_JOBS),
                decomposition.MiniBatchDictionaryLearning(random_state=0,
                                                          n_jobs=N_JOBS),
                decomposition.MiniBatchSparsePCA(random_state=0,
                                                 n_jobs=N_JOBS),
                decomposition.NMF(random_state=0),
                decomposition.PCA(random_state=0),
                decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
                decomposition.TruncatedSVD(random_state=0),
                kernel_approximation.Nystroem(random_state=0),
                kernel_approximation.RBFSampler(random_state=0),
                kernel_approximation.AdditiveChi2Sampler(),
                kernel_approximation.SkewedChi2Sampler(random_state=0),
                cluster.FeatureAgglomeration(),
                skrebate.ReliefF(n_jobs=N_JOBS),
                skrebate.SURF(n_jobs=N_JOBS),
                skrebate.SURFstar(n_jobs=N_JOBS),
                skrebate.MultiSURF(n_jobs=N_JOBS),
                skrebate.MultiSURFstar(n_jobs=N_JOBS),
                imblearn.under_sampling.ClusterCentroids(random_state=0,
                                                         n_jobs=N_JOBS),
                imblearn.under_sampling.CondensedNearestNeighbour(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.EditedNearestNeighbours(random_state=0,
                                                                n_jobs=N_JOBS),
                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.InstanceHardnessThreshold(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.NearMiss(random_state=0,
                                                 n_jobs=N_JOBS),
                imblearn.under_sampling.NeighbourhoodCleaningRule(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.OneSidedSelection(random_state=0,
                                                          n_jobs=N_JOBS),
                imblearn.under_sampling.RandomUnderSampler(random_state=0),
                imblearn.under_sampling.TomekLinks(random_state=0,
                                                   n_jobs=N_JOBS),
                imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.RandomOverSampler(random_state=0),
                imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.BorderlineSMOTE(random_state=0,
                                                       n_jobs=N_JOBS),
                imblearn.over_sampling.SMOTENC(categorical_features=[],
                                               random_state=0,
                                               n_jobs=N_JOBS),
                imblearn.combine.SMOTEENN(random_state=0),
                imblearn.combine.SMOTETomek(random_state=0)
            ]
            newlist = []
            for obj in ev:
                if obj is None:
                    newlist.append(None)
                elif obj == 'all_0':
                    newlist.extend(preprocessors[0:36])
                elif obj == 'sk_prep_all':  # no KernalCenter()
                    newlist.extend(preprocessors[0:8])
                elif obj == 'fs_all':
                    newlist.extend(preprocessors[8:15])
                elif obj == 'decomp_all':
                    newlist.extend(preprocessors[15:26])
                elif obj == 'k_appr_all':
                    newlist.extend(preprocessors[26:30])
                elif obj == 'reb_all':
                    newlist.extend(preprocessors[31:36])
                elif obj == 'imb_all':
                    newlist.extend(preprocessors[36:55])
                elif type(obj) is int and -1 < obj < len(preprocessors):
                    newlist.append(preprocessors[obj])
                elif hasattr(obj, 'get_params'):  # user object
                    if 'n_jobs' in obj.get_params():
                        newlist.append(obj.set_params(n_jobs=N_JOBS))
                    else:
                        newlist.append(obj)
                else:
                    sys.exit("Unsupported preprocessor type: %r" % (obj))
            search_params['preprocessing_' + param_type[5:6]] = newlist
        else:
            sys.exit("Parameter name of the final estimator can't be skipped!")

    return search_params
Exemple #11
0
                       beta=5.0,
                       tol=5e-3,
                       sparseness='components'), False),
    ('Independent components - FastICA',
     decomposition.FastICA(n_components=n_components, whiten=True,
                           max_iter=10), True),
    ('Sparse comp. - MiniBatchSparsePCA',
     decomposition.MiniBatchSparsePCA(n_components=n_components,
                                      alpha=0.8,
                                      n_iter=100,
                                      chunk_size=3,
                                      random_state=rng), True),
    ('MiniBatchDictionaryLearning',
     decomposition.MiniBatchDictionaryLearning(n_components=15,
                                               alpha=0.1,
                                               n_iter=50,
                                               chunk_size=3,
                                               random_state=rng), True),
    ('Cluster centers - MiniBatchKMeans',
     MiniBatchKMeans(n_clusters=n_components,
                     tol=1e-3,
                     batch_size=20,
                     max_iter=50,
                     random_state=rng), True),
    ('Factor Analysis components - FA',
     decomposition.FactorAnalysis(n_components=n_components,
                                  max_iter=2), True),
]

###############################################################################
# Plot a sample of the input data
Exemple #12
0
def faces_decomposition():
    import logging
    from numpy.random import RandomState  #随机数生成器种子,从高斯分布或者其他等分布产生
    import matplotlib.pyplot as plt
    from time import time
    from sklearn.datasets import fetch_olivetti_faces
    from sklearn.cluster import MiniBatchKMeans
    from sklearn import decomposition

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')
    n_row, n_col = 2, 3
    n_components = n_row * n_col
    image_shape = (64, 64)
    rng = RandomState(0)

    #加载数据集
    dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
    faces = dataset.data

    n_samples, n_features = faces.shape

    faces_centered = faces - faces.mean(axis=0)

    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

    print("dataset consits of %d faces" % n_samples)  #样本个数

    def plot_gallery(title, images, n_col=n_col, n_row=n_row):
        plt.figure(figsize=(2. * n_col, 2.26 * n_row))
        plt.suptitle(title, size=16)
        for i, comp in enumerate(images):
            plt.subplot(n_row, n_col, i + 1)
            vmax = max(comp.max(), -comp.min())
            plt.imshow(comp.reshape(image_shape),
                       cmap=plt.cm.gray,
                       interpolation='nearest',
                       vmin=-vmax,
                       vmax=vmax)
            plt.xticks(())
            plt.yticks(())
        plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

    estimators = [
        ('Eigenfaces - PCA using randomized SVD',
         decomposition.PCA(n_components=n_components,
                           svd_solver='randomized',
                           whiten=True), True),
        ('Non-negative components - NMF',
         decomposition.NMF(n_components=n_components, init='nndsvda',
                           tol=5e-3), False),
        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True), True),
        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components,
                                          alpha=0.8,
                                          n_iter=100,
                                          batch_size=3,
                                          random_state=rng), True),
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15,
                                                   alpha=0.1,
                                                   n_iter=50,
                                                   batch_size=3,
                                                   random_state=rng), True),
        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components,
                         tol=1e-3,
                         batch_size=20,
                         max_iter=50,
                         random_state=rng), True),
        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components,
                                      max_iter=2), True),
    ]

    # #############################################################################
    # Plot a sample of the input data

    plot_gallery("First centered Olivetti faces",
                 faces_centered[:n_components])

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_

        # Plot an image representing the pixelwise variance provided by the
        # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,
        # via the PCA decomposition, also provides a scalar noise_variance_
        # (the mean of pixelwise variance) that cannot be displayed as an image
        # so we skip it.
        if (hasattr(estimator, 'noise_variance_')
                and estimator.noise_variance_.ndim >
                0):  # Skip the Eigenfaces case
            plot_gallery("Pixelwise variance",
                         estimator.noise_variance_.reshape(1, -1),
                         n_col=1,
                         n_row=1)
        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()
Exemple #13
0
def prepare_dictionaries(Samples,
                         Filter_specs,
                         Dict_alpha=2,
                         Dict_minibatch_size=5,
                         Dict_epochs=1,
                         Dict_jobs=1,
                         Debug_flag=False):
    """
    Prepare dictionary filters for the convolution layers of fluke_net.
    
    Parameters:
    Samples ........... A tensor of all the samples used to make the dictionaries. This
                        tensor should be of format:
                        [Num_samples, Channels, Height, Width]
                        The type of these tensors should be 64 bit floats.
    Filter_specs ...... A list stating how many filters must be made and their specifications.
                        see the relevant argument for details.
    Return values:
    Filters_output .... A list of tensors. Each tensor is a set of all the kernels for a
                        layer. The tensors are of the following format:
                        [Num_of_kernels, Channels, Kernel_height, Kernel_width]
    """

    Filters_output = []

    for Layer, (C, K, M, _) in enumerate(Filter_specs):
        if Debug_flag:
            print('Layer ' + str(Layer) + ' Samples size: ' +
                  str(Samples.shape))

        # Extract patches from all the samples.
        # First unfold returns view of all slices of size 'Kernel_height', unfolding
        # along the height dimension. Second call handles unfolding along the width
        # dimension with slices of size 'Kernel_width'. The end result is a tensor
        # view of the samples cut into the patches needed for training. Both use a
        # stride of 1.
        # This results in a tensor of the following format:
        # [Num_samples, Channels, Num_height_slices, Num_width_slices, K, K]
        Patches = Samples.unfold(2, K, 1).unfold(3, K, 1).cpu()
        if Debug_flag:
            print('Layer ' + str(Layer) + ' Patches view size: ' +
                  str(Patches.shape))

        # Move channels dimension to the front and reshape tensor to following format:
        # [Channel, Num_patches, Patch_data]
        Patches = Patches.permute(1, 0, 2, 3, 4, 5)
        Patches = Patches.reshape(Patches.shape[0], -1, K**2)
        if Debug_flag:
            print('Layer ' + str(Layer) + ' Patches reshaped size: ' +
                  str(Patches.shape))

        # Fit the dictionary and append the atoms to the list of finished kernels
        # We must loop through each channel of the Samples to compute the parts of
        # the kernels that will act on that channel.
        Kernels_list = []
        for Channel in range(Patches.shape[0]):
            # NOTE:
            # The sklearn functions take 'array-like' as parameters for fitting.
            # I am just passing in the tensors and it seems to be working fine,
            # I don't think I need to convert these back to numpy ndarrays before use.

            # Initialize a dictionary for the given channel of the samples.
            Dict = skde.MiniBatchDictionaryLearning(
                n_components=C,  # num of dict elements to extract
                alpha=Dict_alpha,  # sparsity controlling param
                n_iter=Dict_epochs,  # num of epochs per partial_fit()
                batch_size=Dict_minibatch_size,
                transform_algorithm='omp',
                n_jobs=Dict_jobs)  # number of parallel jobs to run

            # Fit the dictionary to the current channels patches.
            # Fit takes an array parameter of the following format:
            # [Num_samples, Num_features]
            Dict.fit(Patches[Channel, :, :])

            # Reshape the atoms (dictionary components) into kernels and append
            # them to our output list. The components_ array is of format:
            # [Num_components, Num_features]
            Kernels_list.append(Dict.components_.reshape((C, K, K, 1)))

        # Concatenate the list of individual kernels into a ndarry.
        Kernels = np.concatenate(Kernels_list, axis=3)

        # Convert ndarray of kernels into a tensor. Load using the same datatype
        # and device as the Samples these kernels will convolve
        Kernels_tensor = torch.tensor(Kernels,
                                      dtype=Samples.dtype,
                                      device=Samples.device)
        # Must also reorder so that it follows the NCHW format of tensors.
        Kernels_tensor = Kernels_tensor.permute(0, 3, 1, 2)

        if Debug_flag:
            print('Layer ' + str(Layer) + ' Kernels size: ' +
                  str(Kernels_tensor.shape))

        # Create feature map by convolving over Samples with the filters we made
        # from them.
        Convolve_out = torch.nn.functional.conv2d(Samples, Kernels_tensor)

        # Normalize feature map according to activation function (ReLU)
        Convolve_out = torch.nn.functional.relu(Convolve_out)

        # Includes max pooling when specified
        if not M == 0:
            Convolve_out = torch.nn.functional.max_pool2d(Convolve_out, M)

        Samples = Convolve_out

        # Append generated filters to return list.
        Filters_output.append(Kernels_tensor)

    return Filters_output
def plot_faces_decomposition():
    # Display progress logs on stdout
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')
    n_row, n_col = 2, 3
    n_components = n_row * n_col
    image_shape = (64, 64)
    rng = RandomState(0)

    # #############################################################################
    # Load faces data
    faces, _ = fetch_olivetti_faces(return_X_y=True, shuffle=True,
                                    random_state=rng)
    n_samples, n_features = faces.shape

    # global centering
    faces_centered = faces - faces.mean(axis=0)

    # local centering
    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

    print("Dataset consists of %d faces" % n_samples)

    def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
        plt.figure(figsize=(2. * n_col, 2.26 * n_row))
        plt.suptitle(title, size=16)
        for i, comp in enumerate(images):
            plt.subplot(n_row, n_col, i + 1)
            vmax = max(comp.max(), -comp.min())
            plt.imshow(comp.reshape(image_shape), cmap=cmap,
                       interpolation='nearest',
                       vmin=-vmax, vmax=vmax)
            plt.xticks(())
            plt.yticks(())
        plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

    # #############################################################################
    # List of the different estimators, whether to center and transpose the
    # problem, and whether the transformer uses the clustering API.
    estimators = [
        ('Eigenfaces - PCA using randomized SVD',
         decomposition.PCA(n_components=n_components, svd_solver='randomized',
                           whiten=True),
         True),

        ('Non-negative components - NMF',
         decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3),
         False),

        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True),
         True),

        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8,
                                          n_iter=100, batch_size=3,
                                          random_state=rng),
         True),

        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   random_state=rng),
         True),

        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20,
                         max_iter=50, random_state=rng),
         True),

        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components, max_iter=20),
         True),
    ]

    # #############################################################################
    # Plot a sample of the input data

    plot_gallery("First centered Olivetti faces", faces_centered[:n_components])

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_

        # Plot an image representing the pixelwise variance provided by the
        # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,
        # via the PCA decomposition, also provides a scalar noise_variance_
        # (the mean of pixelwise variance) that cannot be displayed as an image
        # so we skip it.
        if (hasattr(estimator, 'noise_variance_') and
                estimator.noise_variance_.ndim > 0):  # Skip the Eigenfaces case
            plot_gallery("Pixelwise variance",
                         estimator.noise_variance_.reshape(1, -1), n_col=1,
                         n_row=1)
        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()

    # #############################################################################
    # Various positivity constraints applied to dictionary learning.
    estimators = [
        ('Dictionary learning',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   random_state=rng),
         True),
        ('Dictionary learning - positive dictionary',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   random_state=rng,
                                                   positive_dict=True),
         True),
        ('Dictionary learning - positive code',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   fit_algorithm='cd',
                                                   random_state=rng,
                                                   positive_code=True),
         True),
        ('Dictionary learning - positive dictionary & code',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   fit_algorithm='cd',
                                                   random_state=rng,
                                                   positive_dict=True,
                                                   positive_code=True),
         True),
    ]

    # #############################################################################
    # Plot a sample of the input data

    plot_gallery("First centered Olivetti faces", faces_centered[:n_components],
                 cmap=plt.cm.RdBu)

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        components_ = estimator.components_
        plot_gallery(name, components_[:n_components], cmap=plt.cm.RdBu)

    plt.show()
    plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

# #############################################################################
# List of the different estimators, whether to center and transpose the
# problem, and whether the transformer uses the clustering API.
estimators = [
    ('PCA',
     decomposition.PCA(n_components=n_components),
     True),

    ('FastICA',
     decomposition.FastICA(n_components=n_components),
     True),

    ('MiniBatchDictionaryLearning',
        decomposition.MiniBatchDictionaryLearning(n_components=n_components),
     True)
]


for name, estimator, center in estimators:
    print("Extracting the top %d %s..." % (n_components, name))
    t0 = time()
    data = faces_centered
    trans_data = estimator.fit_transform(data)
    train_time = (time() - t0)
    print("done in %0.3fs" % train_time)
    components_ = estimator.components_
    plot_gallery('%s - Train time %.1fs' % (name, train_time),
                 components_, trans_data)
def main():
    dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
    faces = dataset.data

    n_samples, n_features = faces.shape
    # global centering
    faces_centered = faces - faces.mean(axis=0)
    # local centering
    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
    print("Dataset consists of %d faces" % n_samples)

    estimators = [
        ('Eigenfaces - PCA using randomized SVD',
         decomposition.PCA(n_components=n_components,
                           svd_solver='randomized',
                           whiten=True), True),
        ('Non-negative components - NMF',
         decomposition.NMF(n_components=n_components, init='nndsvda',
                           tol=5e-3), False),
        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True), True),
        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components,
                                          alpha=0.8,
                                          n_iter=100,
                                          batch_size=3,
                                          random_state=rng), True),
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15,
                                                   alpha=0.1,
                                                   n_iter=50,
                                                   batch_size=3,
                                                   random_state=rng), True),
        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components,
                         tol=1e-3,
                         batch_size=20,
                         max_iter=50,
                         random_state=rng), True),
        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components,
                                      max_iter=2), True),
    ]

    plot_gallery("First centered Olivetti faces",
                 faces_centered[:n_components])

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_
        # so we skip it.
        if (hasattr(estimator, 'noise_variance_')
                and estimator.noise_variance_.ndim >
                0):  # Skip the Eigenfaces case
            plot_gallery("Pixelwise variance",
                         estimator.noise_variance_.reshape(1, -1),
                         n_col=1,
                         n_row=1)
        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()
                              random_state=rng)
pca_model.fit(train_data_zeroavg)
train_data_pc = pca_model.transform(
    train_data_zeroavg)  # Compressed PC-space representation of training data

# -----------------------------------------------------------------------------
# Train sparse coding model on MNIST data

dictionary_size = n_pcs * 2  # number of components in dictionary
alpha = .5  # sparseness parameter
n_iter = 500  # number of iterations

# Initialize MNIST sparse coding model
sparse_model = decomposition.MiniBatchDictionaryLearning(
    n_components=dictionary_size,
    alpha=alpha,
    fit_algorithm='cd',
    n_iter=n_iter,
    random_state=rng)

# Fit model
sparse_model.fit(train_data_pc)

components = pca_model.inverse_transform(
    sparse_model.components_)  # get components in pixel space

k_components = 50
inds = sample(range(np.size(components, 0)), k_components)
components_subset = components[inds, :]

plot_components(components_subset, 'coolwarm')
Exemple #18
0
def MainInforExtract(dataset, image_shape):
    """
    :param dataset: A numpy array. (n_samples, n_features), the input images data must gray scale.
    :param image_shape: the input images shape
    :return: No return
    """
    # Display progress logs on stdout
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')
    n_row, n_col = 1, 1
    n_components = n_row * n_col

    # #############################################################################
    # Load faces data
    n_samples, n_features = dataset.shape

    # global centering
    data_centered = dataset - dataset.mean(axis=0)

    # local centering
    data_centered = data_centered - data_centered.mean(axis=1).reshape(
        n_samples, -1)

    print("Dataset consists of %d samples" % n_samples)

    def plot_gallery(title, images, n_col=n_col, n_row=n_row):
        plt.figure(figsize=(2. * n_col, 2.26 * n_row))
        plt.suptitle(title, size=16)
        for i, comp in enumerate(images):
            maxV = np.max(comp)
            minV = np.min(comp)
            comp = (comp - minV) / (maxV - minV)
            plt.subplot(n_row, n_col, i + 1)
            plt.imshow(comp.reshape(image_shape))
            plt.xticks(())
            plt.yticks(())
        plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

    # #############################################################################
    # List of the different estimators, whether to center and transpose the
    # problem, and whether the transformer uses the clustering API.
    estimators = [
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=12,
                                                   alpha=0.12,
                                                   random_state=512,
                                                   batch_size=4,
                                                   n_iter=3090), True),
    ]

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = dataset
        if center:
            data = data_centered
        print("Input data shape is {}".format(data.shape))
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_
        print("Components shape {}".format(components_.shape))

        # Plot an image representing the pixelwise variance provided by the
        # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,
        # via the PCA decomposition, also provides a scalar noise_variance_
        # (the mean of pixelwise variance) that cannot be displayed as an image
        # so we skip it.

        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()
Exemple #19
0
                       whiten=True), True),
    ('Non-negative components - NMF',
     decomposition.NMF(n_components=n_components, init='nndsvda',
                       tol=5e-3), False),
    ('Independent components - FastICA',
     decomposition.FastICA(n_components=n_components, whiten=True), True),
    ('Sparse comp. - MiniBatchSparsePCA',
     decomposition.MiniBatchSparsePCA(n_components=n_components,
                                      alpha=0.8,
                                      n_iter=100,
                                      batch_size=3,
                                      random_state=rng), True),
    ('MiniBatchDictionaryLearning',
     decomposition.MiniBatchDictionaryLearning(n_components=15,
                                               alpha=0.1,
                                               n_iter=50,
                                               batch_size=3,
                                               random_state=rng), True),
    ('Cluster centers - MiniBatchKMeans',
     MiniBatchKMeans(n_clusters=n_components,
                     tol=1e-3,
                     batch_size=20,
                     max_iter=50,
                     random_state=rng), True),
    ('Factor Analysis components - FA',
     decomposition.FactorAnalysis(n_components=n_components,
                                  max_iter=20), True),
]

# #############################################################################
# Plot a sample of the input data
    "Sparse components - MiniBatchSparsePCA",
    batch_pca_estimator.components_[:n_components],
)

# %%
# Dictionary learning
# ^^^^^^^^^^^^^^^^^^^
#
# By default, :class:`MiniBatchDictionaryLearning` divides the data into
# mini-batches and optimizes in an online manner by cycling over the
# mini-batches for the specified number of iterations.

# %%
batch_dict_estimator = decomposition.MiniBatchDictionaryLearning(
    n_components=n_components,
    alpha=0.1,
    n_iter=50,
    batch_size=3,
    random_state=rng)
batch_dict_estimator.fit(faces_centered)
plot_gallery("Dictionary learning",
             batch_dict_estimator.components_[:n_components])

# %%
# Cluster centers - MiniBatchKMeans
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# `MiniBatchKMeans` is computationally efficient and implements on-line
# learning with a `partial_fit` method. That is why it could be beneficial
# to enhance some time-consuming algorithms with  `MiniBatchKMeans`.

# %%