Esempio n. 1
1
File: util.py Progetto: ToraxXx/gsdr
def get_olivetti_faces():
    faces = fetch_olivetti_faces()
    
    faces.data = faces.data.astype(np.float32)
    faces.target = faces.target.astype(np.int32)

    return faces.data, faces.target
Esempio n. 2
0
def load_data(train_num, train_repeat):
    test_size = (10. - train_num) / 10
    data = fetch_olivetti_faces()
    X = data.images
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=3, stratify=y)
    if train_repeat > 1:
        X_train = X_train.repeat(train_repeat, axis=0)
        y_train = y_train.repeat(train_repeat)
    return X_train, y_train, X_test, y_test
Esempio n. 3
0
def main():
    nComponents = 50
    # Import a dataset for testing
    Faces = data.fetch_olivetti_faces()
    Images = Faces.images
    trainData = Images[:100,:,:]
    testData = Images[100:,:,:]
    # Produce a low dimensional representation
    lowDimTrainData, lowDimTestData = reduceDim( trainData, testData, \
                                                nComponents )
def data_processing_olivetti():
    """
    Python function for importing the Olivetti data set.
    """
    dataset = fetch_olivetti_faces()
    faces = dataset.data
    n_samles, n_features = faces.shape
    class_indices = dataset['target']

    train_set = []
    train_class_indices = []
    train_batches = []
    test_set = []
    test_class_indices = []
    test_batches = []

    curr_idx_count = 0
    batch_count_train = 0
    batch_count_test = 0
    for i in range(len(class_indices)):
        if curr_idx_count <= 6:
            train_set.append(faces[i].reshape((1,len(faces[i]))))
            train_class_indices.append(array([class_indices[i]]))
            train_batches.append(batch_count_train)
            batch_count_train += 1
        elif curr_idx_count <=9:
            test_set.append(faces[i].reshape((1,len(faces[i]))))
            test_class_indices.append(array([class_indices[i]]))
            test_batches.append(batch_count_test)
            batch_count_test += 1
        if curr_idx_count == 9:
            curr_idx_count = -1

        curr_idx_count += 1



    train_path = "output/train/bag_of_words"
    os.makedirs(train_path)
    m.dump(array(train_batches),open(os.path.join(train_path,"batches.p"),"wb"))
    for i in range(len(train_set)):
        m.dump(train_set[i],open(os.path.join(train_path,"bow_batch_"+str(train_batches[i]))+".p","wb"))
        m.dump(train_class_indices[i],open(os.path.join(train_path,"class_indices_batch_"+str(train_batches[i]))+".p","wb"))


    test_path = "output/test/bag_of_words"
    os.makedirs(test_path)
    m.dump(array(test_batches),open(os.path.join(test_path,"batches.p"),"wb"))
    for i in range(len(test_set)):
        m.dump(test_set[i],open(os.path.join(test_path,"bow_batch_"+str(test_batches[i]))+".p","wb"))
        m.dump(test_class_indices[i],open(os.path.join(test_path,"class_indices_batch_"+str(test_batches[i]))+".p","wb"))
def get_data(dataset_name):
    print("Getting dataset: %s" % dataset_name)

    if dataset_name == 'lfw_people':
        X = fetch_lfw_people().data
    elif dataset_name == '20newsgroups':
        X = fetch_20newsgroups_vectorized().data[:, :100000]
    elif dataset_name == 'olivetti_faces':
        X = fetch_olivetti_faces().data
    elif dataset_name == 'rcv1':
        X = fetch_rcv1().data
    elif dataset_name == 'CIFAR':
        if handle_missing_dataset(CIFAR_FOLDER) == "skip":
            return
        X1 = [unpickle("%sdata_batch_%d" % (CIFAR_FOLDER, i + 1))
              for i in range(5)]
        X = np.vstack(X1)
        del X1
    elif dataset_name == 'SVHN':
        if handle_missing_dataset(SVHN_FOLDER) == 0:
            return
        X1 = sp.io.loadmat("%strain_32x32.mat" % SVHN_FOLDER)['X']
        X2 = [X1[:, :, :, i].reshape(32 * 32 * 3) for i in range(X1.shape[3])]
        X = np.vstack(X2)
        del X1
        del X2
    elif dataset_name == 'low rank matrix':
        X = make_low_rank_matrix(n_samples=500, n_features=np.int(1e4),
                                 effective_rank=100, tail_strength=.5,
                                 random_state=random_state)
    elif dataset_name == 'uncorrelated matrix':
        X, _ = make_sparse_uncorrelated(n_samples=500, n_features=10000,
                                        random_state=random_state)
    elif dataset_name == 'big sparse matrix':
        sparsity = np.int(1e6)
        size = np.int(1e6)
        small_size = np.int(1e4)
        data = np.random.normal(0, 1, np.int(sparsity/10))
        data = np.repeat(data, 10)
        row = np.random.uniform(0, small_size, sparsity)
        col = np.random.uniform(0, small_size, sparsity)
        X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size))
        del data
        del row
        del col
    else:
        X = fetch_mldata(dataset_name).data
    return X
def task4():
	data = fetch_olivetti_faces(shuffle=True, random_state=0).data
	image_shape = (64, 64)
	model = RandomizedPCA(n_components=10)
	model.fit(data)
	data_new = model.transform(data)
	mean_components = [data_new[:, i].mean() for i in xrange(data_new.shape[1])]
	influence = np.zeros((data_new.shape[0], data_new.shape[1]))
	for i in xrange(data_new.shape[0]):
		for j in xrange(data_new.shape[1]):
			influence[i, j] = cos(data_new[i, :], mean_components, np.abs(data_new[i, j]), mean_components[j])
	res = []
	for i in xrange(influence.shape[1]):
		res.append(np.argmax(influence[:, i]))
	print res
	write_answer_4(res)
Esempio n. 7
0
    def __init__(self, batch_size, max_patches=50, patch_size=(20, 20), images_num=None, rng=None):
        from sklearn import datasets as sklearn_datasets
        from sklearn.feature_extraction.image import extract_patches_2d

        self._train_batch_size = batch_size
        self._test_batch_size = batch_size

        rng = rng if not rng is None else np.random.RandomState(12)

        faces = sklearn_datasets.fetch_olivetti_faces()
        images_num = images_num if not images_num is None else faces.images.shape[0]

        x_v = np.zeros((max_patches * images_num, patch_size[0]*patch_size[1]))
        classes = np.zeros((max_patches * images_num,))
        
        for img_id, img in enumerate(faces.images):
            if img_id >= images_num:
                break

            patches_id = ((img_id * max_patches),((img_id+1) * max_patches))
            
            x_v[patches_id[0]:patches_id[1], :] = extract_patches_2d(
                img, 
                patch_size, 
                max_patches=max_patches, 
                random_state=rng
            ).reshape((max_patches, patch_size[0]*patch_size[1]))
            
            classes[patches_id[0]:patches_id[1]] = faces.target[img_id]
        
        y_v = one_hot_encode(classes)
        
        test_prop = x_v.shape[0]/5

        self._xt_v = x_v
        self._yt_v = y_v

        self._x_v = x_v
        self._y_v = y_v
        self._i = 0

        self._x_v -= np.mean(self._x_v, axis=0)
        self._x_v /= np.std(self._x_v, axis=0)
        self._x_v *= 0.1
 def init_features(self):
     if self.feature_coef_ is None:
         self.feature_coef_ = self.redis.get("feature_coef")
     if self.feature_coef_ is None:
         pca = PCA(self._n_components)
         test_faces = fetch_olivetti_faces()
         features = np.array(pca.fit_transform(test_faces.data),
                             dtype=np.float32)
         self.redis.set("name:0", "olivetti_faces")
         self.redis.set("name_id:olivetti_faces", 0)
         feature_coef = np.array(pca.components_.T, np.float64)
         dim1, dim2 = feature_coef.shape
         self.redis.hmset("feature_coef", 
                 {"dim1":dim1, "dim2":dim2,
                  "data":feature_coef.tostring()})
         test_features = [f.tostring() for f in features]
         self.redis.rpush("features", *test_features)
         test_face_data = [np.array(f, dtype=np.float32).tostring() for f in test_faces.data]
         self.redis.rpush("faces", *test_face_data)
         for i in xrange(len(test_faces.data)):
             self.redis.hmset("picture:%d" % (i),
                              {"name_id":0, "pic_path":DUMMY_PATH})
         self.redis.set("last_pic_id", len(test_faces.data) - 1)
def OnlineLearningTest01():
	import time

	import matplotlib.pyplot as plt
	import numpy as np

	from sklearn import datasets
	from sklearn.cluster import MiniBatchKMeans
	from sklearn.feature_extraction.image import extract_patches_2d

	faces = datasets.fetch_olivetti_faces()

	print "Learning the dictionary..."
	rng = np.random.RandomState(0)

	kmeans = MiniBatchKMeans(n_clusters = 81, random_state = rng, verbose = True)
	patch_size = (20, 20)

	buffer = []
	index = 1
	t0 = time.time()

	#Online Learning
	index = 0

	for _ in range(6):
		for img in faces.images:
			data = extract_patches_2d(img, patch_size, max_patches = 50, random_state = rng)
			data = np.reshape(data, (len(data), -1))

			buffer.append(data)
			index += 1
			if index % 10 == 0:
				data = np.concatenate(buffer, axis = 0) #这里是把一个数组合并成矩阵

				#这里要先做标准化
				data -= np.mean(data, axis = 0)
				data /= np.std(data, axis = 0)
				kmeans.partial_fit(data) 	#每次都是调用partial_fit函数进行学习
				buffer = []

			if index % 100 == 0:
				print "Partial fit of %4i out of %i" % (index, 6 * len(faces.images))


	dt = time.time() - t0
	print "done in %.2fs. " % dt

	#plot result
	plt.figure(figsize = (4.2, 4))
	for i, patch in enumerate(kmeans.cluster_centers_):
		plt.subplot(9,9, i + 1)
		plt.imshow(patch.reshape(patch_size), cmap = plt.cm.gray, interpolation = "nearest")

		plt.xticks(())
		plt.xticks(())


	plt.suptitle('Patches of faces\nTrain time %.1fs on %d patches' % (dt, 8 * len(faces.images)), fontsize = 16)
	plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

	plt.show()
from numpy.random import RandomState
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_olivetti_faces
from sklearn import decomposition

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.cross_validation import ShuffleSplit
# -- Prepare data and define utility functions ---------------------------------

image_shape = (64, 64)
rng = RandomState(0)

# Load faces data
dataset = fetch_olivetti_faces(data_home='/tmp/',shuffle=True, random_state=rng)
faces = dataset.data


n_samples, n_features = faces.shape

# global centering
faces_centered = faces - faces.mean(axis=0, dtype=np.float64)

print "Dataset consists of %d faces" % n_samples
print "********************************"

def plot_gallery(title, images,n_col,n_row):
    plt.figure(figsize=(2. * n_col, 2.26 * n_row))
    plt.suptitle(title, size=16)
    for i, comp in enumerate(images):
Esempio n. 11
0
def get_olive():
	olive = datasets.fetch_olivetti_faces()
	return olive.data, olive.target
Esempio n. 12
0
from sklearn.datasets import fetch_olivetti_faces
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
data, target = fetch_olivetti_faces(return_X_y=True)

extractor = PCA(n_components=0.7)
per_face_features = extractor.fit_transform(data, target)

basis = extractor.components_
mean_face = extractor.mean_
rank = extractor.n_components_
vector_length = np.linalg.norm(basis, axis=1, ord=2)
print("the length of each vector in the basis is:", vector_length)
covariance_matrix = np.zeros(shape=(rank, rank))
for i in range(covariance_matrix.shape[0]):
    for j in range(covariance_matrix.shape[1]):
        covariance_matrix[i, j] = np.dot(basis[i], basis[j])
print("the inner product of each vector with another one in the basis is",
      covariance_matrix)

# for the part of verifying each value in coordinate vector equals the inner product of the basis vector
# and (X-X_mean) , we will only test this one 1 example

difference_vector = [
    per_face_features[0, i] - np.dot((data[0] - mean_face), basis[i])
    for i in range(rank)
]
print(
    "so the difference in result between this formula and the standard one in PCA is:",
    difference_vector)
Esempio n. 13
0
import matplotlib.pyplot as plt
from sklearn import decomposition
from sklearn.datasets import fetch_olivetti_faces
from numpy.random import RandomState

n_row, n_col = 2, 3
n_components = n_row * n_col
image_shape = (64, 64)
dataset = fetch_olivetti_faces(shuffle=True, random_state=RandomState(0))

faces = dataset.data

def plot_gallery(title, images, n_col=n_col, n_row=n_row):
    plt.figure(figsize=(2. * n_col, 2.26 * n_row))
    plt.suptitle(title, size=16)

    for i, comp in enumerate(images):
        plt.subplot(n_row, n_col, i+1)
        vmax = max(comp.max(),-comp.min())

        plt.imshow(comp.reshape(image_shape),cmap=plt.cm.gray,interpolation='nearest',vmin=-vmax,vmax=vmax)
        plt.xticks(())
        plt.yticks(())
    plt.subplots_adjust(0.01,0.05,0.99,0.93,0.04,0.)

estimators = [
    ('Eigenfaces - PCA using randomized SVD',
     decomposition.PCA(n_components=6,whiten=True)),
    ('Non-negative components - NMF',
     decomposition.NMF(n_components=6,init='nndsvda',
                       tol=5e-3))]
Esempio n. 14
0
__FILENAME__ = download_data
"""
Run this script to make sure data is cached in the appropriate
place on your computer.

The data are only a few megabytes, but conference wireless is
often not very reliable...
"""
import os
import sys
from sklearn import datasets

#------------------------------------------------------------
# Faces data: this will be stored in the scikit_learn_data
#             sub-directory of your home folder
faces = datasets.fetch_olivetti_faces()
print "Successfully fetched olivetti faces data"

#------------------------------------------------------------
# SDSS galaxy data: this will be stored in notebooks/datasets/data
sys.path.append(os.path.abspath('notebooks'))
from datasets import fetch_sdss_galaxy_mags
colors = fetch_sdss_galaxy_mags()
print "Successfully fetched SDSS galaxy data"


#------------------------------------------------------------
# SDSS filters & vega spectrum: stored in notebooks/figures/downloads
from figures.sdss_filters import fetch_filter, fetch_vega_spectrum
spectrum = fetch_vega_spectrum()
print "Successfully fetched vega spectrum"
Esempio n. 15
0

#### Template-3 | FACE CLASSIFICATION #########################################
###############################################################################

import sklearn as sk
from scipy.stats import sem
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces
from sklearn.svm import SVCfrom sklearn.svm import SVC
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score, KFold

#### Get Data
faces = fetch_olivetti_faces() ## 400 images of 40 person
print(faces.DESCR)

#### Data exploration
print("faces dataset:",faces.keys())
print("images data dim:",faces.images.shape)
print("numerical data dim:",faces.data.shape)
print("Labels:",faces.target.shape)

#### Data pre-processing - Data is b/w 0 to 1 so no scaling/transformation required
print(np.max(faces.data))
print(np.min(faces.data))
print(np.mean(faces.data))

#### Print images
def print_faces(images, target, top_n):
import numpy as np
import matplotlib.pylab as plt ,pylab
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.datasets import fetch_olivetti_faces 
faces = fetch_olivetti_faces().data
print(faces.shape) # there are 400 faces each of them is of 64x64=4096 pixels
fig = plt.figure(figsize=(5,5)) 
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) 
# plot 25 random faces
j = 1
np.random.seed(0)
for i in np.random.choice(range(faces.shape[0]), 25): 
    ax = fig.add_subplot(5, 5, j, xticks=[], yticks=[]) 
    ax.imshow(np.reshape(faces[i,:],(64,64)), cmap=plt.cm.bone, interpolation='nearest') 
    j += 1
#plt.show()

n_comp =64
pipeline = Pipeline([('scaling', StandardScaler()), ('pca', PCA(n_components=n_comp))])
faces_proj = pipeline.fit_transform(faces)
print(faces_proj.shape)
# (400, 64)
mean_face = np.reshape(pipeline.named_steps['scaling'].mean_, (64,64))
sd_face = np.reshape(np.sqrt(pipeline.named_steps['scaling'].var_), (64,64))
pylab.figure(figsize=(8, 6))
pylab.plot(np.cumsum(pipeline.named_steps['pca'].explained_variance_ratio_), linewidth=2)
pylab.grid(), pylab.axis('tight'), pylab.xlabel('n_components'), pylab.ylabel('cumulative explained_variance_ratio_')
pylab.show()
pylab.figure(figsize=(10,5))
Esempio n. 17
0
from sklearn.datasets import fetch_olivetti_faces
from sklearn.datasets import fetch_lfw_people
from sklearn.datasets import get_data_home


if __name__ == "__main__":
    fetch_olivetti_faces()

    print("Loading Labeled Faces Data (~200MB)")
    fetch_lfw_people(min_faces_per_person=70, resize=0.4)
    print("=> Success!")
    print("Data saved in %s" % get_data_home())
Esempio n. 18
0
from sklearn.datasets import fetch_olivetti_faces

OUTPUT_BASE_DIR = "/neurospin/brainomics/2014_pca_struct/Olivetti_faces"
if not os.path.exists(OUTPUT_BASE_DIR):
    os.makedirs(OUTPUT_BASE_DIR)

OUTPUT_DATASET_FILE = os.path.join(OUTPUT_BASE_DIR, "X.npy")
OUTPUT_TARGET_FILE = os.path.join(OUTPUT_BASE_DIR, "y.npy")
OUTPUT_VAR_FILE = os.path.join(OUTPUT_BASE_DIR, "pixel_var.png")
OUTPUT_IMAGE_FILE = os.path.join(OUTPUT_BASE_DIR, "example.png")

IM_SHAPE = (64, 64)

###############################################################################
# Load faces data
dataset = fetch_olivetti_faces()
faces = dataset.data

n, p = shape = faces.shape

# global centering
faces_centered_global = faces - faces.mean(axis=0)

# local centering
local_centering = faces_centered_global.mean(axis=1).reshape(n, -1)
faces_centered_local = faces_centered_global - local_centering

print("Dataset shape: {s}".format(s=shape))

# Load ground truth (useful for cross validation)
y = dataset.target
Esempio n. 19
0

# In[10]:


ax=plt.axes()
ax.plot(np.random.rand(50))
ax.yaxis.set_major_locator(plt.NullLocator())
ax.xaxis.set_major_formatter(plt.NullFormatter())


# In[11]:


from sklearn.datasets import fetch_olivetti_faces
faces=fetch_olivetti_faces().images
fig,ax=plt.subplots(5,5,figsize=(5,5))
fig.subplots_adjust(hspace=0,wspace=0)
for i in range(5):
    for j in range(5):
        ax[i,j].xaxis.set_major_locator(plt.NullLocator())
        ax[i,j].yaxis.set_major_locator(plt.NullLocator())
        ax[i,j].imshow(faces[10*i+j],cmap='bone')


# In[18]:


from matplotlib.ticker import MultipleLocator, FormatStrFormatter
t=np.linspace(0,100,100)
s=9.8*np.power(t,2)/2
Esempio n. 20
0
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import numpy as np

# Next, we will retrieve the Olivetti faces dataset.

# In[2]:

from sklearn.datasets import fetch_olivetti_faces, fetch_lfw_people
from ipywidgets import interact
get_ipython().run_line_magic('matplotlib', 'inline')
image_shape = (64, 64)
# Load faces data
dataset = fetch_olivetti_faces('./')
faces = dataset.data

# ### Advice for testing numerical algorithms
# Before we begin this week's assignment, there are some advice that we would like to give for writing functions that work with numerical data. They are useful for finding bugs in your implementation.
#
# Testing machine learning algorithms (or numerical algorithms in general)
# is sometimes really hard as it depends on the dataset
# to produce an answer, and you will never be able to test your algorithm on all the datasets
# we have in the world. Nevertheless, we have some tips for you to help you identify bugs in
# your implementations.
#
# #### 1. Test on small dataset
# Test your algorithms on small dataset: datasets of size 1 or 2 sometimes will suffice. This
# is useful because you can (if necessary) compute the answers by hand and compare them with
# the answers produced by the computer program you wrote. In fact, these small datasets can even have special numbers,
Esempio n. 21
0
def faces_decomposition():
    import logging
    from numpy.random import RandomState  #随机数生成器种子,从高斯分布或者其他等分布产生
    import matplotlib.pyplot as plt
    from time import time
    from sklearn.datasets import fetch_olivetti_faces
    from sklearn.cluster import MiniBatchKMeans
    from sklearn import decomposition

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')
    n_row, n_col = 2, 3
    n_components = n_row * n_col
    image_shape = (64, 64)
    rng = RandomState(0)

    #加载数据集
    dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
    faces = dataset.data

    n_samples, n_features = faces.shape

    faces_centered = faces - faces.mean(axis=0)

    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

    print("dataset consits of %d faces" % n_samples)  #样本个数

    def plot_gallery(title, images, n_col=n_col, n_row=n_row):
        plt.figure(figsize=(2. * n_col, 2.26 * n_row))
        plt.suptitle(title, size=16)
        for i, comp in enumerate(images):
            plt.subplot(n_row, n_col, i + 1)
            vmax = max(comp.max(), -comp.min())
            plt.imshow(comp.reshape(image_shape),
                       cmap=plt.cm.gray,
                       interpolation='nearest',
                       vmin=-vmax,
                       vmax=vmax)
            plt.xticks(())
            plt.yticks(())
        plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

    estimators = [
        ('Eigenfaces - PCA using randomized SVD',
         decomposition.PCA(n_components=n_components,
                           svd_solver='randomized',
                           whiten=True), True),
        ('Non-negative components - NMF',
         decomposition.NMF(n_components=n_components, init='nndsvda',
                           tol=5e-3), False),
        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True), True),
        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components,
                                          alpha=0.8,
                                          n_iter=100,
                                          batch_size=3,
                                          random_state=rng), True),
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15,
                                                   alpha=0.1,
                                                   n_iter=50,
                                                   batch_size=3,
                                                   random_state=rng), True),
        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components,
                         tol=1e-3,
                         batch_size=20,
                         max_iter=50,
                         random_state=rng), True),
        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components,
                                      max_iter=2), True),
    ]

    # #############################################################################
    # Plot a sample of the input data

    plot_gallery("First centered Olivetti faces",
                 faces_centered[:n_components])

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_

        # Plot an image representing the pixelwise variance provided by the
        # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,
        # via the PCA decomposition, also provides a scalar noise_variance_
        # (the mean of pixelwise variance) that cannot be displayed as an image
        # so we skip it.
        if (hasattr(estimator, 'noise_variance_')
                and estimator.noise_variance_.ndim >
                0):  # Skip the Eigenfaces case
            plot_gallery("Pixelwise variance",
                         estimator.noise_variance_.reshape(1, -1),
                         n_col=1,
                         n_row=1)
        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()
Esempio n. 22
0
def load_data(type=0, train_kfold=0):
    #0:yale,1:ar,2:orl,3:olivetti,4:imm
    if int(type) == 0:
        for j in range(15):
            for i in range(11):
                filename = 'C:\\Users\\hasee\\Desktop\\yale\\%d\\s%d.bmp' % (
                    j + 1, i + 1)
                # print filename
                if i == 0 and j == 0:
                    x = ImageToMatrix(filename).ravel()
                else:
                    x = np.concatenate((x, ImageToMatrix(filename).ravel()),
                                       axis=0)
        y = np.ones(165)
        for i in range(15):
            for j in range(11):
                y[i * 11 + j] = i + 1

    if int(type) == 1:
        import os
        file = os.listdir('C:\\Users\\hasee\\Desktop\\AR\\')
        k = 0
        for i in file:
            k += 1
            filename = 'C:\\Users\\hasee\\Desktop\\AR\\' + i
            if k == 1:
                x = ImageToMatrix(filename).ravel()
            else:
                x = np.concatenate((x, ImageToMatrix(filename).ravel()),
                                   axis=0)

        y = np.ones(1680)
        for i in range(120):
            for j in range(14):
                y[i * 14 + j] = i + 1

    if int(type) == 2:
        import os
        file = os.listdir('C:\\Users\\hasee\\Desktop\\ORL\\')
        k = 0
        for i in file:
            filename = 'C:\\Users\\hasee\\Desktop\\ORL\\' + i
            file2 = os.listdir(filename)
            for j in file2:
                if j.split(".")[1] == 'bmp':
                    k += 1
                    filename3 = filename + "\\" + j
                    if k == 1:
                        x = ImageToMatrix(filename3).ravel()
                    else:
                        x = np.concatenate(
                            (x, ImageToMatrix(filename3).ravel()), axis=0)
                else:
                    pass
        y = np.zeros(400)
        for i in range(40):
            for j in range(10):
                y[i * 10 + j] = i + 1
    if int(type) == 3:
        from sklearn.datasets import fetch_olivetti_faces
        data = fetch_olivetti_faces()
        x = data.images.reshape((len(data.images), -1))
        y = data.target
    if int(type) == 4:
        import os
        k = 0
        for i in range(240):
            filename = 'C:\\Users\\hasee\\Desktop\\crop2\\%d.jpg' % i
            k += 1
            if k == 1:
                x = ImageToMatrix(filename).ravel()
            else:
                x = np.concatenate((x, ImageToMatrix(filename).ravel()),
                                   axis=0)
        y = np.zeros(240)
        for i in range(40):
            for j in range(6):
                y[i * 6 + j] = i + 1
    if int(train_kfold) <= 0:
        index = np.array(random.sample(range(len(y)), len(y)))
        x = x[index]
        y = y[index]
        return x, y
    else:
        index = np.array(random.sample(range(len(y)), len(y)))
        test_index = index[0:round(len(y) / int(train_kfold))]
        train_index = index[round(len(y) / int(train_kfold))::]
        train_x = x[train_index]
        train_y = y[train_index]
        test_x = x[test_index]
        test_y = y[test_index]
        return train_x, train_y, test_x, test_y
Esempio n. 23
0
if __name__ == "__main__":    
    
    #Overview:
    #Olivetti dataset
    #Split into test and training
    #extract keypoints and compute sift features on training images        
    #cluster sift features into a visual dictionary of size V
    #represent each image as visual words histogram
    #apply tf-idf (need text data)    
    #fit LDA topic model on bags of visual words
    #given test data transform test image into tf_idf vector
    #use cosine similarity for image retrieval
    #display top-K images
                                                             
    # Load the faces datasets
    data = fetch_olivetti_faces(shuffle=True, random_state=0)
    targets = data.target
    
    data = data.images.reshape((len(data.images), -1))
    data_train = data[targets < 30]
    data_test = data[targets >= 30]
    num_train_images = data_train.shape[0]
        
    #show mean training image        
    plt.figure()
    plt.imshow(np.mean(data_train,axis=0).reshape(64,64))    
    plt.title('Olivetti Dataset (Mean Training Image)')    
    plt.show()
    
    #show random selection of images
    rnd_idx = np.arange(num_train_images)
Esempio n. 24
0
	def train_images2(self):
		self.x = datasets.fetch_olivetti_faces()
		for i in range(41):
			self.all_data.addSample(self.x.data[i], self.x.target[i])
Esempio n. 25
0
from sklearn.utils import as_float_array
from sklearn import linear_model

import matplotlib.pyplot as plt

import numpy as np

from SPCA import SPCA
from SLR import SLR


#########################################################################################################
if __name__ == "__main__":

    dataset = fetch_olivetti_faces()
    data = dataset.data
    labels = dataset.target

    print 'dataset data dimensions : ', data.shape
    print 'dataset labels dimensions : ', labels.shape

    # TODO print eigenfaces normally!!!!
    #n_samples, h, w = dataset.images.shape
    #faces_images = dataset.images
    #print_faces(faces_images, labels, 20)

    # split dataset for training and evaluation
    test_percent = 0.3
    features_train, features_test, labels_train, labels_test = train_test_split(data, labels, test_size = test_percent)
Esempio n. 26
0
#
# Beispiel einer Visualisierung des Olivetti-Dataset 
# 
import matplotlib
matplotlib.use('TkAgg')

import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces

olivetti = fetch_olivetti_faces(data_home="./data", shuffle=False, random_state=0, download_if_missing=True)

# Visualisierung des ersten Bilds 
plt.imshow(olivetti.data[0].reshape(64,64),cmap='gray')
plt.show()
Esempio n. 27
0
def _is_olivetti_faces_not_available():
    try:
        datasets.fetch_olivetti_faces(download_if_missing=False)
        return False
    except IOError:
        return True
Esempio n. 28
0
import tensorflow as tf

from sklearn.datasets import fetch_olivetti_faces

# Set random seed for reproducibility
np.random.seed(1000)

nb_epochs = 600
batch_size = 50
code_length = 256
width = 32
height = 32

if __name__ == '__main__':
    # Load the dataset
    faces = fetch_olivetti_faces(shuffle=True, random_state=1000)
    X_train = faces['images']

    # Create graph
    graph = tf.Graph()

    with graph.as_default():
        input_images_xl = tf.placeholder(tf.float32,
                                         shape=(None, X_train.shape[1],
                                                X_train.shape[2], 1))
        input_noisy_images_xl = tf.placeholder(tf.float32,
                                               shape=(None, X_train.shape[1],
                                                      X_train.shape[2], 1))

        input_images = tf.image.resize_images(
            input_images_xl, (width, height),
Esempio n. 29
0
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import cross_val_score
from sklearn import svm
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier

olivetti = datasets.fetch_olivetti_faces()
X, y = olivetti.data, olivetti.target

X.shape
y.shape



plt.imshow(X[0].reshape(64, 64), cmap=plt.cm.gray_r)
plt.imshow(X[1].reshape(64, 64), cmap=plt.cm.gray_r)
y[0:2]

plt.imshow(X[200].reshape(64, 64), cmap=plt.cm.gray_r)
plt.imshow(X[201].reshape(64, 64), cmap=plt.cm.gray_r)
y[200:202]

# Try SVM
clf = svm.SVC()
clf.fit(X,y)
cross_val_score(clf, X, y, cv=5, scoring='accuracy').mean()

# Try Logistic Regression
logistic = LogisticRegression()
cross_val_score(logistic, X, y, cv=5, scoring='accuracy').mean()
Esempio n. 30
0
def get_olivetti_data():
    olivetti_path = 'olivetti'
    face_data = fetch_olivetti_faces(olivetti_path)
    return face_data.images, face_data.target
Esempio n. 31
0
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.datasets import fetch_olivetti_faces
from sklearn.utils.validation import check_random_state

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV

# Load the faces datasets
data = fetch_olivetti_faces()
targets = data.target
#separação dos dados

data = data.images.reshape((len(data.images), -1))
train = data[targets < 30]
test = data[targets >= 30]  # Test on independent people

# Test on a subset of people
n_faces = 5
rng = check_random_state(4)
face_ids = rng.randint(test.shape[0], size=(n_faces, ))
test = test[face_ids, :]

n_pixels = data.shape[1]
# Upper half of the faces
Esempio n. 32
0
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets, cluster
from sklearn.feature_extraction.image import grid_to_graph

faces = datasets.fetch_olivetti_faces(shuffle=True)
print(faces.keys())
images = faces.images
target = faces.target

X = np.reshape(images, (len(images), -1))
connectivity = grid_to_graph(*images[0].shape)

agglo = cluster.FeatureAgglomeration(connectivity=connectivity, n_clusters=32)

agglo.fit(X)

X_reduced = agglo.transform(X)

X_restored = agglo.inverse_transform(X_reduced)
images_restored = np.reshape(X_restored, images.shape)

plt.figure(1, figsize=(4, 3.5))
plt.clf()
plt.subplots_adjust(left=0.01, right=0.99, bottom=0.01, top=0.91)

for i in range(4):
    plt.subplot(3, 4, i + 1)
    plt.imshow(images[i], cmap=plt.cm.gray, interpolation='nearest')
Esempio n. 33
0
partial-fit. This is because the number of patches that they represent
has become too low, and it is better to choose a random new
cluster.
"""
print(__doc__)

import time

import pylab as pl
import numpy as np

from sklearn import datasets
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction.image import extract_patches_2d

faces = datasets.fetch_olivetti_faces()

###############################################################################
# Learn the dictionary of images

print('Learning the dictionary... ')
rng = np.random.RandomState(0)
kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)
patch_size = (20, 20)

buffer = []
index = 1
t0 = time.time()

# The online learning part: cycle over the whole dataset 4 times
index = 0
Esempio n. 34
0
from openpyxl.workbook import Workbook
from mpl_toolkits.mplot3d import axes3d

import sklearn.datasets as dt

from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
seed = 11
rand_state = 11

# Define the color maps for plots
color_map = plt.cm.get_cmap('RdYlBu')
color_map_discrete = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["red", "cyan", "magenta", "blue"])
# Fetch the dataset and store in X
faces = dt.fetch_olivetti_faces()
X = faces.data

# Fit a kernel density model using GridSearchCV to determine the best parameter for bandwidth
bandwidth_params = {'bandwidth': np.arange(0.01, 1, 0.05)}
grid_search = GridSearchCV(KernelDensity(), bandwidth_params)
grid_search.fit(X)
kde = grid_search.best_estimator_

# Generate/sample 8 new faces from this dataset
new_faces = kde.sample(8, random_state=rand_state)

# Show a sample of 8 original face images and 8 generated faces derived from the faces dataset
fig, ax = plt.subplots(nrows=2,
                       ncols=8,
                       figsize=(18, 6),
Esempio n. 35
0
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces
from sklearn.decomposition import PCA
from matplotlib import font_manager, rc
font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
rc('font', family=font_name)

faces_all = fetch_olivetti_faces()

K = 20 # 20번호에 해당하는 사람 얼굴 사진
pca3 = PCA(n_components=2) # 주성분 2개 분석
X3 = faces_all.data[faces_all.target==K]
W3 = pca3.fit_transform(X3) # 위분석 결과를 토대로 X3의 차원 축소
X32 = pca3.inverse_transform(W3) # 다시 차원 복귀 (결과적으로 주성분이 강조된 형태로)

face_mean = pca3.mean_.reshape(64, 64) # 평균 얼굴 이미지
face_p1 = pca3.components_[0].reshape(64, 64)
face_p2 = pca3.components_[1].reshape(64, 64)

N = 2 # 2행
M = 5 # 5열
fig = plt.figure(figsize=(10,5))
# top에서 1픽셀 간격, bottom 간격 0 hspace=0, wspace=0.05 사진사이 간격
plt.subplots_adjust(top=1, bottom=0, hspace=0, wspace=0.05)
for i in range(N):
    for j in range(M):
        k = i *M +j
        ax = fig.add_subplot(N,M,k+1)
        w = 1.5*(k-5) if k < 5 else 1.5*(k-4)
        # 주성분을 반영해서 다양한 조합
        # face_p1 성분을 더하거나 마이너스 했을때 얼굴 방향이 바뀐다
Esempio n. 36
0
def getImgAsMat(index):
    ds = datasets.fetch_olivetti_faces()
    return np.mat(ds.images[index])
Esempio n. 37
0
def load_faces():
    print("Loading Olivetti face dataset")
    print("-----------------------------")
    from sklearn.datasets import fetch_olivetti_faces
    faces = fetch_olivetti_faces(shuffle=True)
    return faces.data
Esempio n. 38
0
    else:
        second_component_features.append(i + 1)
    max_correlations.append((i, max_corr[0], max_corr[1][0]))

plot_iris(np.array(components_centered).transpose(), target, target_names)
write_answer_3(first_component_features, second_component_features)


# 4

def write_answer_4(list_pc):
    with open("pca_answer4.txt", "a") as fout:
        fout.write(" ".join([str(num) for num in list_pc]))


data = fetch_olivetti_faces(shuffle=True, random_state=0).data
image_shape = (64, 64)

d = 10
model = RandomizedPCA(n_components=d)
model.fit(data)
faces_transformed = model.transform(data)


def center_features(matrix):
    matrix_t = matrix.transpose()
    means = [np.mean(col) for col in matrix_t]
    matrix_t_centered = [[item - col[1] for item in col[0]] for col in zip(matrix_t, means)]
    return np.array(matrix_t_centered).transpose()

def get_data():  
    face_data=datasets.fetch_olivetti_faces()  
    #face_data=datasets.load_iris()  
    data=face_data.data  
    target=face_data.target  
    return data,target 
Esempio n. 40
0
                   vmin=-vmax, vmax=vmax)
        plt.xticks(())
        plt.yticks(())
    plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

#
# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')
n_components = 10
image_shape = (64, 64)
rng = RandomState(0)
#
# ###############################################################################
# Load faces data
dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
faces = dataset.data

n_samples, n_features = faces.shape

# global centering
faces_centered = faces - faces.mean(axis=0)

# local centering
faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

print("Dataset consists of %d faces" % n_samples)


print("Preprocess the face data such that max norm of each image is less than 1")
X = faces_centered
def face_completion_Test01():
	import numpy as np
	import matplotlib.pyplot as plt

	from sklearn.datasets import fetch_olivetti_faces
	from sklearn.utils.validation import check_random_state

	from sklearn.ensemble import ExtraTreesRegressor
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.linear_model import LinearRegression
	from sklearn.linear_model import RidgeCV



	#load the faces datasets
	data = fetch_olivetti_faces()
	targets  = data.target

	#print len(data.data)
	#print len(data.data[0])  #data.data 是 400 * 4096 的数据

	#感觉这里的4096维和原图不一样啊...ravelled image
	#face = data.data[1].reshape(64,64)  #注意这里的data和image
	#face = data.images[1]
	#face_ccw_90 = zip(*face)[::-1]
	#face_cw_90 = zip(*face[::-1])

	#plt.imshow(face_cw_90, cmap = plt.cm.gray_r)
	#plt.show()

	#这里是为了做左右预测, 所以把原图旋转了90度
	#for i in range(len(data.images)):
	#	face = data.images[i]
	#	data.images[i] = face_cw_90 = zip(*face[::-1])




	#print data.images[0]
	data = data.images.reshape((len(data.images), -1)) #相当于就是data.data...把一张图片变成了一个行向量
	#print len(data[0])


	train = data[targets < 30]
	test = data[targets >= 30] #注意这里的test和targe没有关系

	n_faces = 5
	rng = check_random_state(4)

	#test.shape = [100, 4096]
	face_ids = rng.randint(test.shape[0], size = (n_faces, )) #这里相当于是在0-99中随机选择出5个数
	test = test[face_ids, :]

	#print face_ids

	n_pixels = data.shape[1]
	X_train = train[:, :np.ceil(0.5 * n_pixels)] #脸的上半部分
	Y_train = train[:, np.floor(0.5 * n_pixels):] #脸的下半部分
	X_test = test[:, :np.ceil(0.5 * n_pixels)] #相当于是那脸的前半部分预测后半部分 -- 是一个多对多的学习过程, train和test的维度相同
	Y_test = test[:, np.floor(0.5 * n_pixels):]

	#注意因为是要做completion, 所以是regression 而不是 classification
	#这里的ESTMATORS是一个字典
	ESTIMATORS = {
		"Extra trees": ExtraTreesRegressor(n_estimators = 10, max_features = 32, random_state = 0),
		"k-nn": KNeighborsRegressor(),
		"Linear regression": LinearRegression(),
		"Ridge": RidgeCV(),
	}

	#这里是直接进行预测, 也就是fit + predict的过程
	print "start fiting and predicting"
	y_test_predict = dict()
	for name, estimator in ESTIMATORS.items():
		estimator.fit(X_train, Y_train)
		y_test_predict[name] = estimator.predict(X_test)

	print "start plotting"


	#下面是画图

	image_shape = (64, 64)

	n_cols = 1 + len(ESTIMATORS)
	plt.figure(figsize=(2.0 * n_cols, 2.26 * n_faces))
	plt.suptitle("Face completion with multi-output estimators GoGoGo", size = 16)

	for i in range(n_faces):
		true_face = np.hstack((X_test[i], Y_test[i]))

		if i:
			sub = plt.subplot(n_faces, n_cols, i * n_cols + 1)
		else:
			sub = plt.subplot(n_faces, n_cols, i * n_cols + 1, title = "true faces")

		sub.axis("off")

		sub.imshow(true_face.reshape(image_shape), cmap = plt.cm.gray, interpolation = "nearest")

		#a = true_face.reshape(image_shape)
		#sub.imshow(zip(*a)[::-1], cmap = plt.cm.gray, interpolation = "nearest")


		for j, est in enumerate(sorted(ESTIMATORS)):
			completed_face = np.hstack((X_test[i], y_test_predict[est][i]))

			if i:
				sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j)
			else:
				sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j, title = est)

			sub.axis("off")
			sub.imshow(completed_face.reshape(image_shape), cmap = plt.cm.gray, interpolation = "nearest")

			#b = completed_face.reshape(image_shape)
			#sub.imshow(zip(*b)[::-1], cmap = plt.cm.gray, interpolation = "nearest")

	plt.show()
Esempio n. 42
0
# -*- coding: utf-8 -*-
#[email protected]
"""
对原始图片进行可视化
"""
print(__doc__)


from time import time

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_olivetti_faces
from sklearn import decomposition as dcp

plot_grid = (3, 3)
image_shape = (64, 64)

dataset = fetch_olivetti_faces(data_home="data", shuffle=True)
faces = dataset.data

n_samples, n_features = faces.shape


plt.figure(1)
for itr in range(9):
    plt.subplot(3, 3, itr+1)
    plt.imshow(np.reshape(faces[itr+66], image_shape))

plt.show()
Esempio n. 43
0
import pylab
import pickle
import numpy
import theano
import theano.tensor as T
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
from convolutional_mlp import LeNetConvPoolLayer
from sklearn import datasets
# load the saved model
layer0,layer1,layer2,layer3 = pickle.load(open('weight.pkl','rb'))

face=datasets.fetch_olivetti_faces(shuffle=True)
x=face.data[0,:]
x=x.reshape(1,1,64,64)

input = T.tensor4(name='input')

conv_out = conv.conv2d(input,filters=layer0.params[0])
pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=(2,2),
            ignore_border=True
        )
output = T.tanh(pooled_out + layer0.params[1].dimshuffle('x', 0, 'x', 'x'))
f = theano.function([input], output)
filtered_img = f(x)
pylab.gray();
pylab.subplot(1, 3, 1) 
Esempio n. 44
0
def dimension_comprasion():
    np.set_printoptions(threshold=np.inf, precision=1)
    olivetti = datasets.fetch_olivetti_faces()

    glasses = np.genfromtxt('olivetti_glasses.txt', delimiter=',').astype(int)

    # tworzymy wektor z labelkami, czy dane zdjęcie przedstawia okularnika
    y_glasses = np.zeros(olivetti.data.shape[0])
    y_glasses = y_glasses.astype(int)
    y_glasses[glasses] = 1

    # ile osób ma okulary w zbiorze danych
    # print(np.where(y_glasses == 1)[0].size / float(olivetti.data.shape[0]))

    # Wybraliśmy, że będziemy uczyć klasyfikator po okularach.
    y = y_glasses
    # y = y.target

    # show_some_images(olivetti.images, glasses, title="Okularnicy")

    X_train, X_test, y_train, y_test = train_test_split(olivetti.data,
                                                        y,
                                                        test_size=0.2,
                                                        stratify=y,
                                                        random_state=0)
    L, V = load_pca_or_generate(X_train)

    ##
    # Classificatione experiments
    ##
    dimensions = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70]
    errors_train = []
    errors_test = []
    #n = 50
    for n in dimensions:
        X_train_pca = X_train.dot(V[:, :n])
        X_test_pca = X_test.dot(V[:, :n])
        data_all = olivetti.data.dot(V[:, :n])

        dt = DecisionTree(impurity="impurity_entropy")
        t1 = time.time()
        dt.fit(X_train_pca, y_train)
        t2 = time.time()
        print("Time: ", t2 - t1)

        print(dt.tree_)
        print(dt.tree_.shape)
        print(np.sum(dt.tree_[:, DecisionTree.COL_CHILD_LEFT] == 0.0))

        predictions = dt.predict(X_test_pca[:10, :])

        print(predictions)
        print("Dimension:", n)
        print("Wynik klasyfikacji dla zbioru uczącego:",
              dt.score(X_train_pca, y_train))
        errors_test.append(dt.score(X_test_pca, y_test))
        print("Wynik klasyfikacji dla zbioru testowego:",
              dt.score(X_test_pca, y_test))
        print("Wynik klasyfikacji dla zbioru testowego (custom):",
              np.sum(y_test == dt.predict(X_test_pca)) / y_test.size)

    plt.figure()
    plt.plot(dimensions, errors_test)
    plt.title("Dokładność testowa dla liczby użytych cech")
    plt.xlabel("Lizba użytych cech")
    plt.ylabel("Dokładność testowy")
    plt.savefig("docs/dimensions_test.eps")
from sparse_filtering import SparseFiltering

from sklearn.feature_extraction.image import extract_patches_2d

from sklearn.datasets import fetch_olivetti_faces

patch_width = 16  # Learn features for patches of size patch_width*patch_width
n_patches = 25  # Determines number of random patches extracted from each image
n_features = 64  # How many features are learned
maxfun = 200  # The maximal number of evaluations of the objective function
iprint = 10  # after how many function evaluations is information printed
             # by L-BFGS. -1 for no information

###############################################################################
# Load faces data, normalize faces, and convert 2d structures
dataset = fetch_olivetti_faces(shuffle=True)
faces = dataset.data

n_samples, _ = faces.shape

faces_centered = faces - faces.mean(axis=0)  # global centering

faces_centered -= \
    faces_centered.mean(axis=1).reshape(n_samples, -1)  # local centering

faces_centered = \
    faces_centered.reshape(n_samples, 64, 64)  # Reshaping to 64*64 pixel images

print("Dataset consists of %d faces" % n_samples)

###############################################################################
def loadData():
    data = fetch_olivetti_faces()
    targets = data.target
    return data, targets
Esempio n. 47
0
    def test2(self):
        

        # Display progress logs on stdout
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(levelname)s %(message)s')
        n_row, n_col = 2, 3
        n_components = n_row * n_col
        image_shape = (64, 64)
        rng = RandomState(0)
        
        ###############################################################################
        # Load faces data
        dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
        faces = dataset.data
        
        n_samples, n_features = faces.shape
        
        # global centering
        faces_centered = faces - faces.mean(axis=0)
        
        print 'faces_centered has %d dimensions: ', faces_centered.shape
        
        # local centering
        faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
        
        print("Dataset consists of %d faces" % n_samples)
        print("each face has %d features" % n_features )
        
        # List of the different estimators, whether to center and transpose the
        # problem, and whether the transformer uses the clustering API.
        estimators = [

            ('Independent components - FastICA',
             decomposition.FastICA(n_components=n_components, whiten=True),
             True),
             
        ]
        
        
        ###############################################################################
        # Plot a sample of the input data
        
        self.plotGallery("First centered Olivetti faces", faces_centered[:n_components])

        ###############################################################################
        # Do the estimation and plot it
        
        for name, estimator, center in estimators:
            print("Extracting the top %d %s..." % (n_components, name))
            t0 = time()
            data = faces
            if center:
                data = faces_centered
            estimator.fit(data)
            train_time = (time() - t0)
            print("done in %0.3fs" % train_time)
            if hasattr(estimator, 'cluster_centers_'):
                components_ = estimator.cluster_centers_
            else:
                components_ = estimator.components_
            if hasattr(estimator, 'noise_variance_'):
                self.plotGallery("Pixelwise variance",
                             estimator.noise_variance_.reshape(1, -1), n_col=1,
                             n_row=1)
            self.plotGallery('%s - Train time %.1fs' % (name, train_time),
                         components_[:n_components])
        
        plt.show()
from sklearn.datasets import fetch_olivetti_faces
from sklearn.cluster import MiniBatchKMeans
from sklearn import decomposition

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')
n_row, n_col = 2, 3
n_components = n_row * n_col
image_shape = (64, 64)
rng = RandomState(0)

###############################################################################
# Load faces data
dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
faces = dataset.data

n_samples, n_features = faces.shape

# global centering
faces_centered = faces - faces.mean(axis=0)

# local centering
faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

print("Dataset consists of %d faces" % n_samples)


###############################################################################
def plot_gallery(title, images, n_col=n_col, n_row=n_row):
Esempio n. 49
0
def sample_size_comparison():
    np.set_printoptions(threshold=np.inf, precision=1)
    olivetti = datasets.fetch_olivetti_faces()

    glasses = np.genfromtxt('olivetti_glasses.txt', delimiter=',').astype(int)

    # tworzymy wektor z labelkami, czy dane zdjęcie przedstawia okularnika
    y_glasses = np.zeros(olivetti.data.shape[0])
    y_glasses = y_glasses.astype(int)
    y_glasses[glasses] = 1

    # ile osób ma okulary w zbiorze danych
    # print(np.where(y_glasses == 1)[0].size / float(olivetti.data.shape[0]))

    # Wybraliśmy, że będziemy uczyć klasyfikator po okularach.
    y = y_glasses
    # y = y.target

    # show_some_images(olivetti.images, glasses, title="Okularnicy")

    X_train, X_test, y_train, y_test = train_test_split(olivetti.data,
                                                        y,
                                                        test_size=0.2,
                                                        stratify=y,
                                                        random_state=0)
    L, V = load_pca_or_generate(X_train)
    n = 50
    X_train_pca = X_train.dot(V[:, :n])
    X_test_pca = X_test.dot(V[:, :n])
    data_all = olivetti.data.dot(V[:, :n])

    dt = DecisionTree(impurity="impurity_entropy")
    dt.fit(X_train_pca, y_train)
    predictions = dt.predict(X_test_pca[:10, :])

    min_node_vals = np.arange(0.10, 0, -0.01)
    errors_train = np.zeros(min_node_vals.size)
    errors_test = np.zeros(min_node_vals.size)
    for i, min_node_examples in enumerate(min_node_vals):
        dt = DecisionTree(impurity="impurity_entropy",
                          min_node_examples=min_node_examples)
        t1 = time.time()
        dt.fit(X_train_pca, y_train)
        t2 = time.time()
        print("time:", t2 - t1)
        print('min node examples: ', min_node_examples)
        errors_train[i] = 1 - dt.score(X_train_pca, y_train)
        errors_test[i] = 1 - dt.score(X_test_pca, y_test)

    np.set_printoptions(threshold=np.inf, precision=5)
    best_depth = np.argmin(errors_test)
    print('BEST DEPTH:', str(best_depth), " WITH TEST ACCURACY:",
          1 - errors_test[best_depth])
    print('ERRORS TEST: ', errors_test)
    print('ERRORS TRAIN: ', errors_train)

    plt.figure()
    plt.plot(min_node_vals, errors_train, marker='o', label="train errors")
    plt.plot(min_node_vals, errors_test, marker='o', label="test errors")
    plt.xlim(np.max(min_node_vals), np.min(min_node_vals))
    plt.legend()
    plt.title("Procentowa zawartość przykładów w węźle")
    plt.savefig("docs/min_node_vals_test.eps")
Esempio n. 50
0
#(4) Report accuracy as percent correct


from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn import preprocessing
from pybrain.datasets import SupervisedDataSet
from pybrain.tools.shortcuts     import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules   import SoftmaxLayer

# # # # LOAD DATA # # # # 
oFace = datasets.fetch_olivetti_faces() # load data
X,y = oFace.data, oFace.target # extract data: [face x 1D vectorized image]
nFaces = len(np.unique(y)); nObs = len(y) # number of unique faces, total observations

# # # # SET PARAMETERS # # # #
perTrain = 0.85 # fraction of data to use for training
perTest = 1-perTrain
dimRed = 0.99  # percent variance retained for PCA dimensionality reduction

#=====================DATA PREPROCESSING=======================================
# # # Perform PCA to reduce dimensionality of data (in space) # # # 
pcInit = PCA(); pcaOne=pcInit.fit(X)
eigNorm = pcaOne.explained_variance_ratio_;  # percent variance (normalized eigenvalues)
nComps = [ n for n,i in enumerate(np.cumsum(eigNorm)) if i<dimRed ][-1:]
nComps = int(nComps[0])
print( "Reduced Dims from %d to %d" % (X.shape[1],nComps) )
def load_faces():
    X = datasets.fetch_olivetti_faces()
    X.data.dtype='float64'
    return (NATURAL, X)
Esempio n. 52
0
def pruning_comparison_exhaustive():
    np.set_printoptions(threshold=np.inf, precision=1)
    olivetti = datasets.fetch_olivetti_faces()

    glasses = np.genfromtxt('olivetti_glasses.txt', delimiter=',').astype(int)

    # tworzymy wektor z labelkami, czy dane zdjęcie przedstawia okularnika
    y_glasses = np.zeros(olivetti.data.shape[0])
    y_glasses = y_glasses.astype(int)
    y_glasses[glasses] = 1

    # ile osób ma okulary w zbiorze danych
    # print(np.where(y_glasses == 1)[0].size / float(olivetti.data.shape[0]))

    # Wybraliśmy, że będziemy uczyć klasyfikator po okularach.
    y = y_glasses
    # y = y.target

    # show_some_images(olivetti.images, glasses, title="Okularnicy")

    X_train, X_test, y_train, y_test = train_test_split(olivetti.data,
                                                        y,
                                                        test_size=0.2,
                                                        stratify=y,
                                                        random_state=0)
    L, V = load_pca_or_generate(X_train)
    n = 50
    X_train_pca = X_train.dot(V[:, :n])
    X_test_pca = X_test.dot(V[:, :n])
    data_all = olivetti.data.dot(V[:, :n])

    dt = DecisionTree(impurity="impurity_entropy")
    dt.fit(X_train_pca, y_train)

    pentalties = np.arange(0.015, 0.0, -0.0025)
    errors_train = np.zeros(pentalties.size)
    errors_test = np.zeros(pentalties.size)
    for i, penalty in enumerate(pentalties):
        print('penalty', penalty)
        dt = DecisionTree(impurity="impurity_entropy",
                          pruning='exhaustive_subtrees',
                          penalty=penalty)
        t1 = time.time()
        dt.fit(X_train_pca, y_train)
        t2 = time.time()
        print('time:', t2 - t1)
        errors_train[i] = 1 - dt.score(X_train_pca, y_train)
        errors_test[i] = 1 - dt.score(X_test_pca, y_test)

    np.set_printoptions(threshold=np.inf, precision=5)
    best_penalty_index = np.argmin(errors_test)
    print('BEST PENALTY:', str(pentalties[best_penalty_index]),
          " WITH TEST ACCURACY:", 1 - errors_test[best_penalty_index])
    print('ERRORS TEST: ', errors_test)
    print('ERRORS TRAIN: ', errors_train)

    plt.figure()
    plt.plot(pentalties,
             errors_train,
             color='black',
             marker='o',
             label="train")
    plt.plot(pentalties, errors_test, color='red', marker='o', label="test")
    plt.legend()
    plt.xlabel("penalty")
    plt.xlim(np.max(pentalties), np.min(pentalties))
    plt.title("Pruning - exhaustive subtrees")
    plt.savefig("docs/pruning_exhaustive.eps")
Esempio n. 53
0
def load_faces():
    print("Loading Olivetti face dataset")
    print("-----------------------------")
    from sklearn.datasets import fetch_olivetti_faces
    faces = fetch_olivetti_faces(shuffle=True)
    return faces.data
Esempio n. 54
0
def main():
    np.set_printoptions(threshold=np.inf, precision=1)
    olivetti = datasets.fetch_olivetti_faces()

    glasses = np.genfromtxt('olivetti_glasses.txt', delimiter=',').astype(int)

    # tworzymy wektor z labelkami, czy dane zdjęcie przedstawia okularnika
    y_glasses = np.zeros(olivetti.data.shape[0])
    y_glasses = y_glasses.astype(int)
    y_glasses[glasses] = 1

    # ile osób ma okulary w zbiorze danych
    # print(np.where(y_glasses == 1)[0].size / float(olivetti.data.shape[0]))

    # Wybraliśmy, że będziemy uczyć klasyfikator po okularach.
    y = y_glasses
    # y = y.target

    # show_some_images(olivetti.images, glasses, title="Okularnicy")

    X_train, X_test, y_train, y_test = train_test_split(olivetti.data,
                                                        y,
                                                        test_size=0.2,
                                                        stratify=y,
                                                        random_state=0)
    L, V = load_pca_or_generate(X_train)

    ##
    # Classificatione experiments
    ##
    n = 50
    X_train_pca = X_train.dot(V[:, :n])
    X_test_pca = X_test.dot(V[:, :n])
    data_all = olivetti.data.dot(V[:, :n])

    dt = DecisionTree(impurity="impurity_entropy")
    t1 = time.time()
    dt.fit(X_train_pca, y_train)
    t2 = time.time()
    print("Time: ", t2 - t1)

    print(dt.tree_)
    print(dt.tree_.shape)
    print(np.sum(dt.tree_[:, DecisionTree.COL_CHILD_LEFT] == 0.0))

    predictions = dt.predict(X_test_pca[:10, :])

    print(predictions)
    print("Wynik klasyfikacji dla zbioru uczącego:",
          dt.score(X_train_pca, y_train))
    print("Wynik klasyfikacji dla zbioru testowego:",
          dt.score(X_test_pca, y_test))
    print("Wynik klasyfikacji dla zbioru testowego (custom):",
          np.sum(y_test == dt.predict(X_test_pca)) / y_test.size)
    #
    # # show_some_images(V.T, indexes=[6, 3, 7])
    # show_some_images(X_test[:10, :], subtitles=predictions)
    #
    # ##
    # # Testy dla głębokości
    # ##
    #
    # max_depth = int(np.max(dt.tree_[:, DecisionTree.COL_DEPTH]))
    # errors_train = np.zeros(max_depth + 1)
    # errors_test = np.zeros(max_depth + 1)
    # for d in range(max_depth + 1):
    #     dt = DecisionTree(impurity="impurity_entropy", max_depth=d)
    #     dt.fit(X_train_pca, y_train)
    #     print('depth: ', d, 'shape:', dt.tree_.shape)
    #     errors_train[d] = 1 - dt.score(X_train_pca, y_train)
    #     errors_test[d] = 1 - dt.score(X_test_pca, y_test)
    #
    # np.set_printoptions(threshold=np.inf, precision=5)
    # best_depth = np.argmin(errors_test)
    # print('BEST DEPTH:', str(best_depth), " WITH TEST ACCURACY:", 1 - errors_test[best_depth])
    # print('ERRORS TEST: ', errors_test)
    # print('ERRORS TRAIN: ', errors_train)
    #
    # plt.figure()
    # plt.plot(errors_train, color='black', marker='o')
    # plt.plot(errors_test, color='red', marker='o')
    # plt.show()
    #
    # ##
    # # Testy dla sample
    # ##
    #
    # min_node_vals = np.arange(0.10, 0, -0.01)
    # errors_train = np.zeros(min_node_vals.size)
    # errors_test = np.zeros(min_node_vals.size)
    # for i, min_node_examples in enumerate(min_node_vals):
    #     dt = DecisionTree(impurity="impurity_entropy", min_node_examples=min_node_examples)
    #     dt.fit(X_train_pca, y_train)
    #     print('min node examples: ', min_node_examples)
    #     errors_train[i] = 1 - dt.score(X_train_pca, y_train)
    #     errors_test[i] = 1 - dt.score(X_test_pca, y_test)
    #
    # np.set_printoptions(threshold=np.inf, precision=5)
    # best_depth = np.argmin(errors_test)
    # print('BEST DEPTH:', str(best_depth), " WITH TEST ACCURACY:", 1 - errors_test[best_depth])
    # print('ERRORS TEST: ', errors_test)
    # print('ERRORS TRAIN: ', errors_train)
    #
    # plt.figure()
    # plt.plot(errors_train, color='black', marker='o')
    # plt.plot(errors_test, color='red', marker='o')
    # plt.show()
    #
    # ##
    # # Jak kara lambda wpływa
    # ##
    # dt = DecisionTree(impurity="impurity_entropy")
    # dt.fit(X_train_pca, y_train)
    #
    # pentalties = np.arange(0.015, 0.0, -0.0025)
    # errors_train = np.zeros(pentalties.size)
    # errors_test = np.zeros(pentalties.size)
    # for i, penalty in enumerate(pentalties):
    #     print('penalty', penalty)
    #     dt = DecisionTree(impurity="impurity_entropy", pruning='greedy_subtrees', penalty=penalty)
    #     t1 = time.time()
    #     dt.fit(X_train_pca, y_train)
    #     t2 = time.time()
    #     print('time:', t2-t1)
    #     errors_train[i] = 1 - dt.score(X_train_pca, y_train)
    #     errors_test[i] = 1 - dt.score(X_test_pca, y_test)
    #
    # np.set_printoptions(threshold=np.inf, precision=5)
    # best_penalty_index = np.argmin(errors_test)
    # print('BEST PENALTY:', str(pentalties[best_penalty_index]), " WITH TEST ACCURACY:", 1 -
    #       errors_test[best_penalty_index])
    # print('ERRORS TEST: ', errors_test)
    # print('ERRORS TRAIN: ', errors_train)
    #
    # plt.figure()
    # plt.plot(errors_train, color='black', marker='o')
    # plt.plot(errors_test, color='red', marker='o')
    # plt.title("greedy")
    # plt.show()
    #
    # #
    # # Exhaustive
    # #
    # dt = DecisionTree(impurity="impurity_entropy")
    # dt.fit(X_train_pca, y_train)
    #
    # pentalties = np.arange(0.015, 0.0, -0.0025)
    # errors_train = np.zeros(pentalties.size)
    # errors_test = np.zeros(pentalties.size)
    # for i, penalty in enumerate(pentalties):
    #     print('penalty', penalty)
    #     dt = DecisionTree(impurity="impurity_entropy", pruning='exhaustive_subtrees', penalty=penalty)
    #     t1 = time.time()
    #     dt.fit(X_train_pca, y_train)
    #     t2 = time.time()
    #     print('time:', t2-t1)
    #     errors_train[i] = 1 - dt.score(X_train_pca, y_train)
    #     errors_test[i] = 1 - dt.score(X_test_pca, y_test)
    #
    # np.set_printoptions(threshold=np.inf, precision=5)
    # best_penalty_index = np.argmin(errors_test)
    # print('BEST PENALTY:', str(pentalties[best_penalty_index]), " WITH TEST ACCURACY:", 1 -
    #       errors_test[best_penalty_index])
    # print('ERRORS TEST: ', errors_test)
    # print('ERRORS TRAIN: ', errors_train)

    # svc = SVC()
    # svc.fit(X_train, y_train)
    # print("SVC Default scores [train, test]:" + str(svc.score(X_train, y_train)) + ', ' + str(svc.score(X_test, y_test)))
    #
    # svc = SVC(C=10.0**1, kernel='rbf')
    # svc.fit(X_train, y_train)
    # print("SVC Default scores [train, test]:" + str(svc.score(X_train, y_train)) + ', ' + str(svc.score(X_test, y_test)))

    # Cs = 2.0**np.arange(-8, 2)
    # svm_errs_train = np.zeros(Cs.size)
    # svm_errs_test = np.zeros(Cs.size)
    #
    # for i, C in enumerate(Cs):
    #     svc = SVC(C=C, kernel='linear')
    #     svc.fit(X_train_pca, y_train)
    #     print(
    #         "SVC Default scores [train, test]:" + str(svc.score(X_train_pca, y_train)) + ', ' + str(svc.score(X_test_pca, y_test)))
    #     svm_errs_test[i] = svc.score(X_test_pca, y_test)
    #     svm_errs_train[i] = svc.score(X_train_pca, y_train)
    #
    # plt.figure()
    # plt.plot(np.log(Cs), svm_errs_test, color='black', marker='o')
    # plt.plot(np.log(Cs), svm_errs_train, color='red', marker='o')
    # plt.title("")
    # plt.grid(True)
    # plt.show()

    #
    # Drzewko z sklearn
    #
    sklearn_tree = DecisionTreeClassifier(min_samples_split=2,
                                          criterion='entropy',
                                          random_state=0)
    t1 = time.time()
    sklearn_tree.fit(X_train_pca, y_train)
    t2 = time.time()
    print("Sklearn time", t2 - t1)
    print("Node count", sklearn_tree.tree_.node_count)
    print("Train, test", sklearn_tree.score(X_train_pca, y_train),
          sklearn_tree.score(X_test_pca, y_test))
The code below also illustrates how the construction and the computation
of the predictions can be parallelized within multiple jobs.
"""
print(__doc__)

from time import time
import pylab as pl

from sklearn.datasets import fetch_olivetti_faces
from sklearn.ensemble import ExtraTreesClassifier

# Number of cores to use to perform parallel fitting of the forest model
n_jobs = 1

# Load the faces dataset
data = fetch_olivetti_faces()
X = data.images.reshape((len(data.images), -1))
y = data.target

mask = y < 5  # Limit to 5 classes
X = X[mask]
y = y[mask]

# Build a forest and compute the pixel importances
print("Fitting ExtraTreesClassifier on faces data with %d cores..." % n_jobs)
t0 = time()
forest = ExtraTreesClassifier(n_estimators=1000,
                              max_features=128,
                              compute_importances=True,
                              n_jobs=n_jobs,
                              random_state=0)
Esempio n. 56
0
import numpy as np
import cv2
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
from sklearn.datasets import fetch_olivetti_faces
import scipy
from scipy import fftpack
import time
from pprint import pprint
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score


df = fetch_olivetti_faces()


def plot_3(data, num_photo):
    fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2,3, figsize = (15,6))
    ax4.imshow(data[num_photo[0]], cmap=plt.cm.gray)
    ax5.imshow(data[num_photo[1]], cmap=plt.cm.gray)
    ax1.imshow(df.images[num_photo[0]], cmap=plt.cm.gray)
    ax2.imshow(df.images[num_photo[1]], cmap=plt.cm.gray)

    plt.show()


def plot_3_hist(data, num_photo):
    fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2,3, figsize = (15,6))
    ax4.hist(data[num_photo[0]])
from numpy.random import RandomState
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces
from sklearn import decomposition
 
 
n_row, n_col = 2, 3
n_components = n_row * n_col
image_shape = (64, 64)
 
 
###############################################################################
# Load faces data
dataset = fetch_olivetti_faces(shuffle=True, random_state=RandomState(0))
faces = dataset.data
 
###############################################################################
def plot_gallery(title, images, n_col=n_col, n_row=n_row):
    plt.figure(figsize=(2. * n_col, 2.26 * n_row)) 
    plt.suptitle(title, size=16)
 
    for i, comp in enumerate(images):
        plt.subplot(n_row, n_col, i + 1)
        vmax = max(comp.max(), -comp.min())
 
        plt.imshow(comp.reshape(image_shape), cmap=plt.cm.gray,
                   interpolation='nearest', vmin=-vmax, vmax=vmax)
        plt.xticks(())
        plt.yticks(())
    plt.subplots_adjust(0.01, 0.05, 0.99, 0.94, 0.04, 0.)
 
Esempio n. 58
0
import matplotlib.pyplot as plt
import numpy as np
import time
import logging
from sklearn.datasets import fetch_olivetti_faces

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

data_home = 'face_dientity/'
logging.info('Start to load dataset')
faces = fetch_olivetti_faces(data_home=data_home)
logging.info('Done with load dataset')

#400*4096有400张图,每张图有4096个特征
X = faces.data  #训练数据集
y = faces.target  #类别目标索引
targets = np.unique(faces.target)
#给图片命名
target_names = np.array(["c%d" % t for t in targets])
#人物个数
n_targets = target_names.shape[0]
#总图片   高  宽
n_samples, h, w = faces.images.shape
print('Sample count: {}\nTarget count: {}'.format(n_samples, n_targets))
print('Image size: {}x{}\nDataset shape: {}\n'.format(w, h, X.shape))


#images是二维数据,每一行是图片数据
def plot_gallery(images, titles, h, w, n_row=2, n_col=5):
    """显示图片阵列"""
    plt.figure(figsize=(2 * n_col, 2.2 * n_row), dpi=144)
def get_faces():
    data_images = fetch_olivetti_faces()
    return [data_images.images, data_images.target]
def plot_multioutput_face_completion():

    # Load the faces datasets
    data = fetch_olivetti_faces()
    targets = data.target

    data = data.images.reshape((len(data.images), -1))
    train = data[targets < 30]
    test = data[targets >= 30]  # Test on independent people

    # Test on a subset of people
    n_faces = 5
    rng = check_random_state(4)
    face_ids = rng.randint(test.shape[0], size=(n_faces, ))
    test = test[face_ids, :]

    n_pixels = data.shape[1]
    X_train = train[:, :np.ceil(0.5 * n_pixels)]  # Upper half of the faces
    y_train = train[:, np.floor(0.5 * n_pixels):]  # Lower half of the faces
    X_test = test[:, :np.ceil(0.5 * n_pixels)]
    y_test = test[:, np.floor(0.5 * n_pixels):]

    # Fit estimators
    ESTIMATORS = {
        "Extra trees": ExtraTreesRegressor(n_estimators=10, max_features=32,
                                           random_state=0),
        "K-nn": KNeighborsRegressor(),
        "Linear regression": LinearRegression(),
        "Ridge": RidgeCV(),
    }

    y_test_predict = dict()
    for name, estimator in ESTIMATORS.items():
        estimator.fit(X_train, y_train)
        y_test_predict[name] = estimator.predict(X_test)

    # Plot the completed faces
    image_shape = (64, 64)

    n_cols = 1 + len(ESTIMATORS)
    plt.figure(figsize=(2. * n_cols, 2.26 * n_faces))
    plt.suptitle("Face completion with multi-output estimators", size=16)

    for i in range(n_faces):
        true_face = np.hstack((X_test[i], y_test[i]))

        if i:
            sub = plt.subplot(n_faces, n_cols, i * n_cols + 1)
        else:
            sub = plt.subplot(n_faces, n_cols, i * n_cols + 1,
                              title="true faces")


        sub.axis("off")
        sub.imshow(true_face.reshape(image_shape),
                   cmap=plt.cm.gray,
                   interpolation="nearest")

        for j, est in enumerate(sorted(ESTIMATORS)):
            completed_face = np.hstack((X_test[i], y_test_predict[est][i]))

            if i:
                sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j)

            else:
                sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j,
                                  title=est)

            sub.axis("off")
            sub.imshow(completed_face.reshape(image_shape),
                       cmap=plt.cm.gray,
                       interpolation="nearest")

    plt.show()