Esempio n. 1
0
def test_load_fake_lfw_people():
    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA,
                                  min_faces_per_person=3,
                                  download_if_missing=False)

    # The data is croped around the center as a rectangular bounding box
    # around the face. Colors are converted to gray levels:
    assert_equal(lfw_people.images.shape, (10, 62, 47))
    assert_equal(lfw_people.data.shape, (10, 2914))

    # the target is array of person integer ids
    assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2])

    # names of the persons can be found using the target_names array
    expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez']
    assert_array_equal(lfw_people.target_names, expected_classes)

    # It is possible to ask for the original data without any croping or color
    # conversion and not limit on the number of picture per person
    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None,
                                  slice_=None, color=True,
                                  download_if_missing=False)
    assert_equal(lfw_people.images.shape, (17, 250, 250, 3))

    # the ids and class names are the same as previously
    assert_array_equal(lfw_people.target,
                       [0, 0, 1, 6, 5, 6, 3, 6, 0, 3, 6, 1, 2, 4, 5, 1, 2])
    assert_array_equal(lfw_people.target_names,
                       ['Abdelatif Smith', 'Abhati Kepler', 'Camara Alvaro',
                        'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
Esempio n. 2
0
def get_eigenfaces():
    # get sklearn faces data set
    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=1.0)
    n_samples, h, w = lfw_people.images.shape
    np.random.seed(42)

    # get face data
    print "Getting LFW people data from SKLearn..."
    X = lfw_people.data

    # subtract average row from each row
    print "Normalizing image array..."
    mean_image = np.mean(X, axis = 0)
    arr_norm = np.zeros([n_samples, h*w])
    arr_norm = X - mean_image

    # run pca using the signular value decomposition
    print "Running PCA of input image set. This may take a few moments."
    pca = PCA()
    pca.fit(arr_norm)
    eigenfaces = pca.components_

    # Save images
    print "Saving eigenfaces..."
    path = 'static/eigenface_images/'
    for i, face in enumerate(eigenfaces[:50]):
        process_image.save_image_vector(path,str(i),face)
    print "Complete! Saving pickle files..."

    input_data = {'mean_image': mean_image, 'eigenfaces': eigenfaces, 'arr_norm': arr_norm}
    f = open('eigenface_data.p', 'wb')
    pickle.dump(input_data, f)
    f.close()
    print "Pickle files saved. Shutting up shop now."
Esempio n. 3
0
def visualize():
  """
  Writes out various visualizations of our testing data."
  """
  print "Preparing visualizations..."

  tile_faces(fetch_lfw_people()["images"], constants.LOG_DIR + "/all_faces_tiled.png")
Esempio n. 4
0
File: util.py Progetto: ToraxXx/gsdr
def get_lfw():
    lfw = fetch_lfw_people(resize=1)
    
    lfw.data = lfw.data.astype(np.float32) / 255.0
    lfw.target = lfw.target.astype(np.int32)

    return lfw.data, lfw.target
Esempio n. 5
0
def dictionary_learn_ex():

    patch_shape = (18, 18)
    n_atoms = 225
    n_plot_atoms = 225
    n_nonzero_coefs = 2
    n_jobs = 8
    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,color=False)
    n_imgs, h, w = lfw_people.images.shape

    imgs = []
    for i in range(n_imgs):
        img = lfw_people.images[i, :, :].reshape((h, w))
        img /= 255.
        imgs.append(img)

    print 'Extracting reference patches...'
    X = extract_patches(imgs, patch_size=patch_shape[0],scale=False,n_patches=int(1e5),verbose=True,n_jobs=n_jobs)
    print "number of patches:", X.shape[1]

    se = sparse_encoder(algorithm='bomp',params={'n_nonzero_coefs': n_nonzero_coefs}, n_jobs=n_jobs)

    odc = online_dictionary_coder(n_atoms=n_atoms, sparse_coder=se, n_epochs=2,
                                  batch_size=1000, non_neg=False, verbose=True, n_jobs=n_jobs)
    odc.fit(X)
    D = odc.D
    plt.figure(figsize=(4.2, 4))
    for i in range(n_plot_atoms):
        plt.subplot(15, 15, i + 1)
        plt.imshow(D[:, i].reshape(patch_shape), cmap=plt.cm.gray)
        plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.0, hspace=0.0)
        plt.xticks(())
        plt.yticks(())
    plt.show()
Esempio n. 6
0
def get_lfw(max_size=None):
    dataset = fetch_lfw_people(color=True)
    # keep only one image per person
    return image_per_label(
        dataset.images,
        dataset.target,
        dataset.target_names,
        max_size=max_size)
Esempio n. 7
0
def _download_lwf(dataset,size):
    from sklearn.datasets import fetch_lfw_people
    '''
    :param dataset:
    :return:
    '''
    lfw_people = fetch_lfw_people(color=True,resize=size)
    f = gzip.open(dataset, 'w')
    cPkl.dump([lfw_people.images.astype('uint8'),lfw_people.target], f,
              protocol=cPkl.HIGHEST_PROTOCOL)
    f.close()
Esempio n. 8
0
def generateface2picsmapping(minimum_faces_per_person=1):
	lfw_people = fetch_lfw_people(min_faces_per_person=minimum_faces_per_person, resize=0.4)
	n_samples, h, w = lfw_people.images.shape
	X, y, target_names = lfw_people.data, lfw_people.target, lfw_people.target_names
	n_examples, n_features = X.shape
	face2pics = []
	print(max(y))
	for i in range((max(y)+1)):
		face2pics.append([target_names[i],[] ])
	for i in range(len(y)):
		face2pics[y[i]][1].append(i)
	return face2pics
Esempio n. 9
0
def getData2():
    global X, n, d, y, h, w
    lfw_people = fetch_lfw_people(min_faces_per_person=40, resize=0.4)
    n, h, w = lfw_people.images.shape
    X = lfw_people.data
    d = X.shape[1]
    y = lfw_people.target
    n_classes = lfw_people.target_names.shape[0]
    print("Total dataset size:")
    print("n_samples: %d" % n)
    print("n_features: %d" % d)
    print("n_classes: %d" % n_classes)
    return X, y, n_classes
Esempio n. 10
0
def getFaceData():
    # Download the data, if not already on disk and load it as numpy arrays
    lfw_people = fetch_lfw_people(data_home='.', min_faces_per_person=70, resize=0.4)
    # insert code here 
    X = lfw_people.data
    n_features = X.shape[1]
    y = lfw_people.target
    target_names = lfw_people.target_names
    n_classes = target_names.shape[0]
    n_samples, h, w = lfw_people.images.shape
    print "Total dataset size:"
    print "n_samples: %d" % n_samples
    print "n_features: %d" % n_features
    print "n_classes: %d" % n_classes
    return X,y,n_features,target_names,n_classes,n_samples,h,w
def get_data(dataset_name):
    print("Getting dataset: %s" % dataset_name)

    if dataset_name == 'lfw_people':
        X = fetch_lfw_people().data
    elif dataset_name == '20newsgroups':
        X = fetch_20newsgroups_vectorized().data[:, :100000]
    elif dataset_name == 'olivetti_faces':
        X = fetch_olivetti_faces().data
    elif dataset_name == 'rcv1':
        X = fetch_rcv1().data
    elif dataset_name == 'CIFAR':
        if handle_missing_dataset(CIFAR_FOLDER) == "skip":
            return
        X1 = [unpickle("%sdata_batch_%d" % (CIFAR_FOLDER, i + 1))
              for i in range(5)]
        X = np.vstack(X1)
        del X1
    elif dataset_name == 'SVHN':
        if handle_missing_dataset(SVHN_FOLDER) == 0:
            return
        X1 = sp.io.loadmat("%strain_32x32.mat" % SVHN_FOLDER)['X']
        X2 = [X1[:, :, :, i].reshape(32 * 32 * 3) for i in range(X1.shape[3])]
        X = np.vstack(X2)
        del X1
        del X2
    elif dataset_name == 'low rank matrix':
        X = make_low_rank_matrix(n_samples=500, n_features=np.int(1e4),
                                 effective_rank=100, tail_strength=.5,
                                 random_state=random_state)
    elif dataset_name == 'uncorrelated matrix':
        X, _ = make_sparse_uncorrelated(n_samples=500, n_features=10000,
                                        random_state=random_state)
    elif dataset_name == 'big sparse matrix':
        sparsity = np.int(1e6)
        size = np.int(1e6)
        small_size = np.int(1e4)
        data = np.random.normal(0, 1, np.int(sparsity/10))
        data = np.repeat(data, 10)
        row = np.random.uniform(0, small_size, sparsity)
        col = np.random.uniform(0, small_size, sparsity)
        X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size))
        del data
        del row
        del col
    else:
        X = fetch_mldata(dataset_name).data
    return X
def gen_face_sets():
    people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
    n_samples, h, w = people.images.shape

    data = people.data
    n_features = data.shape[1]

    target = people.target
    target_names = people.target_names
    n_classes = target_names.shape[0]

    N = len(target)
    inds = random.sample(sp.arange(0, N), N)
    n_train = int(sp.floor(0.8 * N))
    trainingdata = data[inds[0:n_train], :]
    trainingtarget = target[inds[0:n_train]]
    testdata = data[inds[n_train:]]
    testtarget = target[inds[n_train:]]
    return trainingdata, testdata, trainingtarget, testtarget
Esempio n. 13
0
def load_data():
    global training_data, testing_data

    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

    xs = lfw_people.data
    ys = lfw_people.target

    inputs = []
    labels = list(ys)

    for face in xs:
        V = Vol(50, 37, 1, 0.0)
        V.w = list(face)
        inputs.append(augment(V, 30))

    x_tr, x_te, y_tr, y_te = train_test_split(inputs, labels, test_size=0.25)

    training_data = zip(x_tr, y_tr)
    testing_data = zip(x_te, y_te)

    print 'Dataset made...'
Esempio n. 14
0
parser.add_argument('--dataset','-ds',help = 'Dataset (mnist, people)', default = None)

args = parser.parse_args()

if args.mode == None: exit()

print 'fetch data...'

if args.dataset == 'mnist':
	from sklearn.datasets import fetch_mldata
	data = fetch_mldata('MNIST original', data_home=".")
	images = np.array(data.data).astype(np.float32)
	images = images.reshape(images.shape[0],28,28)
elif args.dataset == 'people':
	from sklearn.datasets import fetch_lfw_people
	data = fetch_lfw_people()
	images = np.array(data.images).astype(np.float32)
else:
	print 'Select dataset from (mnist, people)'
	exit()

if args.mode == 'mnist_fc':
	from mnist_fc import Generator, Discriminator
elif args.mode == 'mnist_conv':
	from mnist_conv import Generator, Discriminator
elif args.mode == 'people':
	from people_conv import Generator, Discriminator
else:
	print 'Select mode from (mnist_fc, mnist_conv, people)'
	exit()
 def __init__(self):
     self.faces = fetch_lfw_people(min_faces_per_person=60)
     print('data loaded')
                                 len(batch_sizes))
        all_times['rpca'].extend([results_dict['rpca']['time']] *
                                 len(batch_sizes))
        all_errors['rpca'].extend([results_dict['rpca']['error']] *
                                  len(batch_sizes))
        for batch_size in batch_sizes:
            ipca = IncrementalPCA(n_components=n_components,
                                  batch_size=batch_size)
            results_dict = {k: benchmark(est, data) for k, est in [('ipca',
                                                                   ipca)]}
            all_times['ipca'].append(results_dict['ipca']['time'])
            all_errors['ipca'].append(results_dict['ipca']['error'])

        plot_batch_times(all_times, n_components, batch_sizes, data)
        # RandomizedPCA error is always worse (approx 100x) than other PCA
        # tests
        plot_batch_errors(all_errors, n_components, batch_sizes, data)

faces = fetch_lfw_people(resize=.2, min_faces_per_person=5)
# limit dataset to 5000 people (don't care who they are!)
X = faces.data[:5000]
n_samples, h, w = faces.images.shape
n_features = X.shape[1]

X -= X.mean(axis=0)
X /= X.std(axis=0)

fixed_batch_size_comparison(X)
variable_batch_size_comparison(X)
plt.show()
Esempio n. 17
0
def face_feature():
    from time import time
    import logging
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import GridSearchCV
    from sklearn.datasets import fetch_lfw_people
    from sklearn.metrics import classification_report
    from sklearn.metrics import confusion_matrix
    from sklearn.decomposition import PCA
    from sklearn.svm import SVC

    # 在stdout中输出过程日志
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
    # 如果本地还没有Numpy数组格式的数据,则从网上下载。
    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
    # 图像数组的规模
    n_samples, h, w = lfw_people.images.shape

    X = lfw_people.data
    n_features = X.shape[1]

    # 人物id是预测目的标签
    y = lfw_people.target
    target_names = lfw_people.target_names
    n_classes = target_names.shape[0]
    print("Total dataset size:")
    print("n_samples: %d" % n_samples)
    print("n_features: %d" % n_features)
    print("n_classes: %d" % n_classes)

    # 用分层K-Fold方法划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=42)

    # 在人脸数据集上计算PCA(当作无标签数据集):无监督特征提取/维数压缩
    n_components = 150
    print("Extracting the top %d eigenfaces from %d faces" %
          (n_components, X_train.shape[0]))
    t0 = time()
    pca = PCA(n_components=n_components, svd_solver='randomized',
              whiten=True).fit(X_train)
    print("done in %0.3fs" % (time() - t0))
    eigenfaces = pca.components_.reshape((n_components, h, w))

    print("Projecting the input data on the eigenfaces orthonormal basis")
    t0 = time()
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    print("done in %0.3fs" % (time() - t0))

    # 训练SVM分类模型
    print("Fitting the classifier to the training set")
    t0 = time()
    param_grid = {
        'C': [1e3, 5e3, 1e4, 5e4, 1e5],
        'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
    }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    clf = clf.fit(X_train_pca, y_train)
    print("done in %0.3fs" % (time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)

    # 在测试集上定量评估模型质量
    print("Predicting people's names on the test set")
    t0 = time()
    y_pred = clf.predict(X_test_pca)
    print("done in %0.3fs" % (time() - t0))
    print(classification_report(y_test, y_pred, target_names=target_names))
    print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))

    # 用matplotlib定量绘制预测器的评估
    def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
        """Helper function to plot a gallery of portraits"""
        plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
        plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
        for i in range(n_row * n_col):
            plt.subplot(n_row, n_col, i + 1)
            plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
            plt.title(titles[i], size=12)
            plt.xticks(())
            plt.yticks(())

    # 在测试集的一部分上绘制预测结果图象
    def title(y_pred, y_test, target_names, i):
        pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
        true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
        return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)

    prediction_titles = [
        title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0])
    ]
    plot_gallery(X_test, prediction_titles, h, w)

    # 画出辨识度最高的特征脸
    eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
    plot_gallery(eigenfaces, eigenface_titles, h, w)

    plt.show()
                              'training.1600000.processed.noemoticon.csv')
    test_path = os.path.join(sentiment140_path,
                             'testdata.manual.2009.06.14.csv')

    if not os.path.exists(sentiment140_path):
        if not os.path.exists(archive_path):
            print("Downloading dataset from %s (77MB)" % SENTIMENT140_URL)
            opener = urlopen(SENTIMENT140_URL)
            open(archive_path, 'wb').write(opener.read())
        else:
            print("Found archive: " + archive_path)

        print("Extracting %s to %s" % (archive_path, sentiment140_path))
        zf = zipfile.ZipFile(archive_path)
        zf.extractall(sentiment140_path)
    print("Checking that the sentiment 140 CSV files exist...")
    assert os.path.exists(train_path)
    assert os.path.exists(test_path)
    print("=> Success!")


if __name__ == "__main__":
    datasets_folder = get_datasets_folder()
    check_sentiment140(datasets_folder)

    print("Loading Labeled Faces Data (~200MB)")
    from sklearn.datasets import fetch_lfw_people
    fetch_lfw_people(min_faces_per_person=70, resize=0.4,
                     data_home=datasets_folder)
    print("=> Success!")
Esempio n. 19
0
"""

import numpy as np
from time import time
import pylab as pl

from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.svm import SVC
from sklearn import grid_search

####################################################################
# Download the data (if not already on disk); load it as numpy arrays
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,
                              color=True, funneled=False, slice_=None, 
                              download_if_missing =True)

# introspect the images arrays to find the shapes (for plotting)
images = lfw_people.images / 255.
n_samples, h, w, n_colors = images.shape

# the label to predict is the id of the person
target_names = lfw_people.target_names.tolist()

####################################################################
# Pick a pair to classify such as
names = ['Tony Blair', 'Colin Powell']
#names = ['Donald Rumsfeld', 'Colin Powell']

idx0 = (lfw_people.target == target_names.index(names[0]))
Esempio n. 20
0
def Bot_image_recognized(image):
    print(__doc__)

    # Display progress logs on stdout
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

    ###############################################################################
    # Download the data, if not already on disk and load it as numpy arrays

    lfw_people = fetch_lfw_people(min_faces_per_person=40, resize=0.4)

    # introspect the images arrays to find the shapes (for plotting)
    n_samples, h, w = lfw_people.images.shape

    #Resize image to training data set
    pil_im = Image.open(image)
    image_resized = pil_im.resize((w, h))
    #image_resized=resizeimage.resize_thumbnail(pil_im, [h, w])
    face = array(image_resized.convert("L"), "f")
    face_1D = face.ravel()
    print(face_1D)

    # for machine learning we use the 2 data directly (as relative pixel
    # positions info is ignored by this model)
    X = lfw_people.data
    print(X[0])
    n_features = X.shape[1]

    # the label to predict is the id of the person
    y = lfw_people.target
    target_names = lfw_people.target_names
    n_classes = target_names.shape[0]

    print("Total dataset size:")
    print("n_samples: %d" % n_samples)
    print("n_features: %d" % n_features)
    print("n_classes: %d" % n_classes)

    ###############################################################################
    # Split into a training set and a test set using a stratified k fold
    # split into a training and testing set
    X_train = X
    print(X_train.shape)
    y_train = y
    X_test = face_1D
    print(X_test.shape)

    ###############################################################################
    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction
    n_components = 100

    print("Extracting the top %d eigenfaces from %d faces" %
          (n_components, X_train.shape[0]))
    t0 = time()
    pca = PCA(n_components=n_components, svd_solver='randomized',
              whiten=True).fit(X_train)
    print("done in %0.3fs" % (time() - t0))

    eigenfaces = pca.components_.reshape((n_components, h, w))

    print("Projecting the input data on the eigenfaces orthonormal basis")
    t0 = time()
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    print("done in %0.3fs" % (time() - t0))

    ###############################################################################
    # Train a SVM classification model

    print("Fitting the classifier to the training set")
    t0 = time()
    param_grid = {
        'C': [1e3, 5e3, 1e4, 5e4, 1e5],
        'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
    }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    clf = clf.fit(X_train_pca, y_train)
    print("done in %0.3fs" % (time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)

    ###############################################################################
    # Quantitative evaluation of the model quality on the test set

    print("Predicting people's names on the test set")
    t0 = time()
    y_pred = clf.predict(X_test_pca)
    print("done in %0.3fs" % (time() - t0))
    print(target_names[y_pred])
    #print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))
    return target_names[y_pred]
Esempio n. 21
0
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import datasets
import numpy as np
import imutils
import cv2
import sklearn

print('[INFO] fetching data...')
dataset = datasets.fetch_lfw_people(min_faces_per_person=70,
                                    funneled=True,
                                    resize=0.5)
(trainData, testData, trainLabels,
 testLabels) = train_test_split(dataset.data,
                                dataset.target,
                                test_size=0.25,
                                random_state=42)

print('[INFO] training model...')
model = LogisticRegression()
model.fit(trainData, trainLabels)
print(
    classification_report(testLabels,
                          model.predict(testData),
                          target_names=dataset.target_names))
Esempio n. 22
0
import youtube_dl
import cv2
import face_recognition
import sklearn
from sklearn.datasets import fetch_lfw_people
lfw_people = fetch_lfw_people()

def process_video(vidfile):
    face_localizations = []
    face_encodings = []
    face_ids = []
    frame_num = 0

    # start processing video
    input_movie = cv2.VideoCapture(vidfile)
    length = int(input_movie.get(cv2.CAP_PROP_FRAME_COUNT))
    while True:
        ret, frame = input_movie.read()
        frame_num += 1
        if not ret:
            continue
        # bgr to rgb
        rgb_frame = frame[:, :, ::-1]

        # Find all the faces and face encodings in the current frame of video
        face_locations = face_recognition.face_locations(rgb_frame, model="hog")
        face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

        if face_encodings:
            face_encodings = face_encodings[0]
        face_ids = [None for i in face_encodings] 
Esempio n. 23
0
from skimage.feature import canny
from skimage.draw import circle_perimeter
from skimage.util import img_as_ubyte

count = 0

print(__doc__)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

###############################################################################
# Download the data, if not already on disk and load it as numpy arrays

lfw_people = fetch_lfw_people(min_faces_per_person=70,
                              slice_=(slice(50, 140), slice(61, 189)),
                              resize=1.5)

faces = lfw_people.images

#for all in lfw_people
for face in faces:

    # Dee the below is just a quick fix, just do 10, not the whole lot for
    # because that produced a lot of images to count. So for simplicity this
    # version stops after 10 images.
    if count < 10:

        image = face
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 10))
        ax1.imshow(image, cmap=plt.cm.gray)
Esempio n. 24
0
from sklearn import datasets, model_selection
from keras import utils, models, layers, optimizers

lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=.4)
n_samples, h, w = lfw_people.images.shape
x = lfw_people.images.reshape(n_samples, h, w) / 255.0
target_names = lfw_people.target_names
n_class = len(target_names)
y = utils.to_categorical(lfw_people.target, n_class)
x_train, x_test, y_train, y_test = model_selection.train_test_split(
    x, y, test_size=.1, random_state=42)

sequential = models.Sequential()
sequential.add(layers.GRU(64, input_shape=(h, w), dropout=.25))
#sequential.add(layers.LSTM(64,input_shape=(h,w),dropout=.25))
#sequential.add(layers.LSTM(128,input_shape=(h,w),dropout=.25))
sequential.add(layers.Dense(n_class, activation='softmax'))

sequential.compile(optimizer=optimizers.adam(),
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
sequential.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=120)
Esempio n. 25
0
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.svm import SVC

print(__doc__)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

###############################################################################
# Download the data, if not already on disk and load it as numpy arrays
# only consider people that have a minimum 70 pictures in the data set
# we only resize the images so that each have a 0.4 aspect ratio
lfw_people = fetch_lfw_people('./faces')

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

print("Total dataset size:")
def lfwTest01():
	from sklearn.datasets import fetch_lfw_people
	lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

	for name in lfw_people.target_names:
		print(name)
Esempio n. 27
0
from sklearn.datasets import fetch_lfw_people
logger.info("sys.version_info")
logger.info("sklearn.__version__")

import math
import numpy as np

from skimage import exposure
import scipy.misc
import caffe
import scipy.io as io

# loading data

lfw_people = fetch_lfw_people(color=True)
lfw_people_color = lfw_people
target_names = lfw_people.target_names
X, y = lfw_people.data, lfw_people.target
# this does not work, deprecated
# lfw_fea_data = io.loadmat('LFW_Feature.mat')

# read targets

target_img = "0.jpg"
image = caffe.io.load_image(target_img)
target = image
plt.figure()
plt.imshow(target)

enhanced = exposure.equalize_hist(image[50:180, 60:170])
Esempio n. 28
0
def face_recognition_test():
    print(__doc__)

    # Display progress logs on stdout
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

    ###############################################################################
    # Download the data, if not already on disk and load it as numpy arrays

    lfw_people = fetch_lfw_people(min_faces_per_person=40, resize=0.4)

    # introspect the images arrays to find the shapes (for plotting)
    n_samples, h, w = lfw_people.images.shape

    # for machine learning we use the 2 data directly (as relative pixel
    # positions info is ignored by this model)
    X = lfw_people.data
    n_features = X.shape[1]

    #The label to predict is the id of the person
    y = lfw_people.target
    target_names = lfw_people.target_names
    n_classes = target_names.shape[0]

    print("Total dataset size:")
    print("n_samples: %d" % n_samples)
    print("n_features: %d" % n_features)
    print("n_classes: %d" % n_classes)

    ###############################################################################
    # Split into a training set and a test set using a stratified k fold

    # split into a training and testing set
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=42)

    ###############################################################################
    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction
    n_components = 100

    print("Extracting the top %d eigenfaces from %d faces" %
          (n_components, X_train.shape[0]))
    t0 = time()
    pca = PCA(n_components=n_components, svd_solver='randomized',
              whiten=True).fit(X_train)
    print("done in %0.3fs" % (time() - t0))

    eigenfaces = pca.components_.reshape((n_components, h, w))

    print("Projecting the input data on the eigenfaces orthonormal basis")
    t0 = time()
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    print("done in %0.3fs" % (time() - t0))

    ###############################################################################
    # Train a SVM classification model

    print("Fitting the classifier to the training set")
    t0 = time()
    param_grid = {
        'C': [1e3, 5e3, 1e4, 5e4, 1e5],
        'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
    }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    clf = clf.fit(X_train_pca, y_train)
    print("done in %0.3fs" % (time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)

    ###############################################################################
    # Quantitative evaluation of the model quality on the test set

    print("Predicting people's names on the test set")
    t0 = time()
    y_pred = clf.predict(X_test_pca)
    print("done in %0.3fs" % (time() - t0))
    acc = clf.score(X_test_pca, y_test)
    print(acc)

    print(classification_report(y_test, y_pred, target_names=target_names))
    print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))

    ###############################################################################
    # Qualitative evaluation of the predictions using matplotlib

    def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
        """Helper function to plot a gallery of portraits"""
        plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
        plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
        for i in range(n_row * n_col):
            plt.subplot(n_row, n_col, i + 1)
            plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
            plt.title(titles[i], size=12)
            plt.xticks(())
            plt.yticks(())

    # plot the result of the prediction on a portion of the test set

    def title(y_pred, y_test, target_names, i):
        pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
        true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
        return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)

    prediction_titles = [
        title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0])
    ]

    plot_gallery(X_test, prediction_titles, h, w)

    # plot the gallery of the most significative eigenfaces

    eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
    plot_gallery(eigenfaces, eigenface_titles, h, w)

    plt.show()
Esempio n. 29
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_lfw_people
import matplotlib.pyplot as plt

tf.logging.set_verbosity(tf.logging.INFO)
#############################################
# LOAD DATA AND SPLIT FOR TRAINING AND EVALUATING
# this produces cropped centered 64x64 image
lfw_people = fetch_lfw_people(min_faces_per_person=70,
                              slice_=(slice(61, 189), slice(61, 189)),
                              resize=0.5,
                              color=False)
X = lfw_people.images
y = lfw_people.target

target_names = lfw_people.target_names
n_classes = target_names.shape[0]

y = np.asarray(y, dtype=np.int32)

# split into a training and testing set
# X_train, X_test, y_train, y_test = train_test_split(
train_set, eval_set, train_lbl, eval_lbl = train_test_split(X,
                                                            y,
                                                            test_size=0.25,
                                                            random_state=10)
def load():
    lfw_people = fetch_lfw_people(min_faces_per_person=5, resize=1)
    return lfw_people
Esempio n. 31
0
def test_load_empty_lfw_people():
    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA, download_if_missing=False)
from sklearn.datasets import fetch_lfw_people
import numpy as np

people = fetch_lfw_people(min_faces_per_person=20,
                          resize=0.7,
                          download_if_missing=True)
print(people.images.shape)

#Importing required packages and utilities

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt

people.target = people.target.reshape(people.target.shape[0], 1)

#preprocessing data
raw_data = people.images.reshape(
    people.images.shape[0], people.images.shape[1] * people.images.shape[2])
scaler = StandardScaler()
scaled_data = scaler.fit_transform(raw_data)

#spliting data into training and testing set

from sklearn.neighbors import KNeighborsClassifier

components = []
accuracies = []
for i in xrange(1, scaled_data.shape[1]):
    pca = PCA(n_components=i)
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.svm import SVC


print(__doc__)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')


###############################################################################
# Download the data, if not already on disk and load it as numpy arrays

lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
print(y)
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
Esempio n. 34
0
def go(options):

    # Debugging info to see if we're using the GPU
    print('devices', device_lib.list_local_devices())

    # These are people in the data that smile
    SMILING = [0, 7, 8, 11, 12, 13, 14, 20, 27, 155, 153, 154, 297]
    NONSMILING = [1, 2, 3, 6, 10, 60, 61, 136, 138, 216, 219, 280]

    # Dowload the data
    faces = datasets.fetch_lfw_people(data_home='.')
    x = faces.images  # x is a 13000 by 67 by 42 array

    hidden_size = options.hidden

    # Build the encoder
    encoder = Sequential()

    encoder.add(Flatten(input_shape=(62, 47)))
    encoder.add(Dense(1024, activation='relu'))
    encoder.add(Dense(512, activation='relu'))
    encoder.add(Dense(256, activation='relu'))
    encoder.add(Dense(128, activation='relu'))
    encoder.add(Dense(hidden_size))

    # Build the decoder
    decoder = Sequential()

    decoder.add(Dense(128, activation='relu', input_dim=hidden_size))
    decoder.add(Dense(256, activation='relu'))
    decoder.add(Dense(512, activation='relu'))
    decoder.add(Dense(1024, activation='relu'))
    decoder.add(Dense(62 * 47, activation='relu'))
    decoder.add(Reshape((62, 47)))

    # Stick em together to make the autoencoder
    auto = Sequential()

    auto.add(encoder)
    auto.add(decoder)

    auto.summary()

    # Choose a loss function (MSE) and a search algorithm
    #         (Adam, a fancy version of gradient descent)
    optimizer = Adam(lr=options.lr)
    auto.compile(optimizer=optimizer, loss='mse')

    # Search for a good model
    auto.fit(x,
             x,
             epochs=options.epochs,
             batch_size=256,
             shuffle=True,
             validation_split=0.1)

    # Select the smiling and nonsmiling images from the dataset
    smiling = x[SMILING, ...]
    nonsmiling = x[NONSMILING, ...]

    # Pass them through the encoder
    smiling_latent = encoder.predict(smiling)
    nonsmiling_latent = encoder.predict(nonsmiling)

    # Compute the means for both groups
    smiling_mean = smiling_latent.mean(axis=0)
    nonsmiling_mean = nonsmiling_latent.mean(axis=0)

    # Subtract for smiling vector
    smiling_vector = smiling_mean - nonsmiling_mean

    # Making somebody smile (person 42):
    latent = encoder.predict(x[None, 42, ...])
    l_smile = latent + 0.3 * smiling_vector
    smiling = decoder.predict(l_smile)

    # Plot fronwing-to-smiling transition for several people
    # in a big PDF image
    randos = 6
    k = 9
    fig = plt.figure(figsize=(k, randos))

    for rando in range(randos):
        rando_latent = encoder.predict(x[None, rando, ...])

        # plot several images
        adds = np.linspace(-1.0, 1.0, k)

        for i in range(k):
            gen_latent = rando_latent + adds[i] * smiling_vector
            gen = decoder.predict(gen_latent)

            ax = fig.add_subplot(randos,
                                 k,
                                 rando * k + i + 1,
                                 xticks=[],
                                 yticks=[])
            ax.imshow(gen.reshape((62, 47)), cmap=plt.cm.gray)

    plt.savefig('rando-to-smiling.pdf')
Esempio n. 35
0
def main():
    print(__doc__)

    # Display progress logs on stdout
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')


###############################################################################
    # Download the data, if not already on disk and load it as numpy arrays

    lfw_people = fetch_lfw_people(min_faces_per_person=100, resize=0.4)

    # introspect the images arrays to find the shapes (for plotting)
    n_samples, h, w = lfw_people.images.shape

    # for machine learning we use the 2 data directly (as relative pixel
    # positions info is ignored by this model)
    X = lfw_people.data
    n_features = X.shape[1]

    # the label to predict is the id of the person
    y = lfw_people.target
    target_names = lfw_people.target_names
    n_classes = target_names.shape[0]

    print("Total dataset size:")
    print("n_samples: %d" % n_samples)
    print("n_features: %d" % n_features)
    print("n_classes: %d" % n_classes)


    ###############################################################################
    # Split into a training set and a test set using a stratified k fold

    # split into a training and testing set
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25)

    n_train_samples = X_train.shape[0]
    n_test_samples = X_test.shape[0]

    ###############################################################################
    # legacy PCA: just computes all the eigenvectors of the training data
    # then select eigenvectors that have the highest eigenvalues

    legacy_PCA_demo = False
    if legacy_PCA_demo:
        n_components = 150

        print("Extracting the top %d eigenfaces from %d faces using legacy PCA"
              % (n_components, X_train.shape[0]))
        t0 = time()
        pca = LegacyPCA(n_components=n_components, whiten=True).fit(X_train)
        print("done in %0.3fs" % (time() - t0))

        print("Projecting the input data on the eigenfaces orthonormal basis")
        t0 = time()
        X_train_pca_legacy = pca.transform(X_train)
        X_test_pca_legacy = pca.transform(X_test)
        print("done in %0.3fs" % (time() - t0))

        print("Fitting the Prototype classifier to the training set using legacy PCA")
        t0 = time()
        clf = PrototypeClassifier().fit(X_train_pca_legacy, y_train)
        print("done in %0.3fs" % (time() - t0))

        print("Predicting people's names on the test set")
        t0 = time()
        y_pred = clf.predict(X_test_pca_legacy)
        print("done in %0.3fs" % (time() - t0))

        print(classification_report(y_test, y_pred, target_names=target_names))

        print("Fitting the SVM classifier to the training set using legacy PCA")
        t0 = time()
        param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                      'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
        clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid)
        clf = clf.fit(X_train_pca_legacy, y_train)
        print("done in %0.3fs" % (time() - t0))
        print("Best estimator found by grid search:")
        print(clf.best_estimator_)

        print("Predicting people's names on the test set")
        t0 = time()
        y_pred = clf.predict(X_test_pca_legacy)
        print("done in %0.3fs" % (time() - t0))

        print(classification_report(y_test, y_pred, target_names=target_names))

    ##############################################################################
    # Random PCA
    random_PCA_demo = True
    if random_PCA_demo:
        n_components = 150

        print("Extracting the top %d eigenfaces from %d faces using random PCA"
              % (n_components, X_train.shape[0]))
        t0 = time()
        pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
        print("done in %0.3fs" % (time() - t0))

        eigenfaces_random = pca.components_.reshape((n_components, h, w))

        print("Projecting the input data on the eigenfaces orthonormal basis")
        t0 = time()
        X_train_pca_random = pca.transform(X_train)
        X_test_pca_random = pca.transform(X_test)
        print("done in %0.3fs" % (time() - t0))

        print("Fitting the Prototype classifier to the training set using random PCA")
        t0 = time()
        clf = PrototypeClassifier().fit(X_train_pca_random, y_train)
        print("done in %0.3fs" % (time() - t0))

        print("Predicting people's names on the test set")
        t0 = time()
        y_pred = clf.predict(X_test_pca_random)
        print("done in %0.3fs" % (time() - t0))

        print(classification_report(y_test, y_pred, target_names=target_names))

        print("Fitting the classifier to the training set using random PCA")
        t0 = time()
        param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                      'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
        clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid)
        clf = clf.fit(X_train_pca_random, y_train)
        print("done in %0.3fs" % (time() - t0))
        print("Best estimator found by grid search:")
        print(clf.best_estimator_)

        print("Predicting people's names on the test set")
        t0 = time()
        y_pred = clf.predict(X_test_pca_random)
        print("done in %0.3fs" % (time() - t0))

        print(classification_report(y_test, y_pred, target_names=target_names))

    ##############################################################################
    # EM PCA
    em_PCA_demo = True
    if em_PCA_demo:
        n_components = 150

        print("Extracting the top %d eigenfaces from %d faces using random PCA"
              % (n_components, X_train.shape[0]))
        t0 = time()
        pca = EMPCA(n_components=n_components, whiten=True).fit(X_train)
        print("done in %0.3fs" % (time() - t0))

        eigenfaces_em = pca.components_.reshape((n_components, h, w))

        print("Projecting the input data on the eigenfaces orthonormal basis")
        t0 = time()
        X_train_pca_em = pca.transform(X_train)
        X_test_pca_em = pca.transform(X_test)
        print("done in %0.3fs" % (time() - t0))

        print("Fitting the classifier to the training set using EM PCA")
        t0 = time()
        param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                      'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
        clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid)
        clf = clf.fit(X_train_pca_em, y_train)
        print("done in %0.3fs" % (time() - t0))
        print("Best estimator found by grid search:")
        print(clf.best_estimator_)

        print("Predicting people's names on the test set")
        t0 = time()
        y_pred = clf.predict(X_test_pca_em)
        print("done in %0.3fs" % (time() - t0))

        print(classification_report(y_test, y_pred, target_names=target_names))

    ###############################################################################
    # Classification using prototype and Euclidean metric

    ###############################################################################
    # Classification using support vector machines

    ###############################################################################
    # Qualitative evaluation of the predictions using matplotlib
    eigenfaces_legacy = pca.components_.reshape((n_components, h, w))
    eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces_legacy.shape[0])]
    plot_gallery(eigenfaces_legacy, eigenface_titles, h, w)
Esempio n. 36
0
        if not os.path.exists(archive_path):
            print("Downloading dataset from %s (84.1MB)" % IMDB_URL)
            opener = urlopen(IMDB_URL)
            open(archive_path, 'wb').write(opener.read())
        else:
            print("Found archive: " + archive_path)

        print("Extracting %s to %s" % (archive_path, imdb_path))

        tar = tarfile.open(archive_path, "r:gz")
        tar.extractall(path=imdb_path)
        tar.close()
        os.remove(archive_path)

    print("Checking that the IMDb train & test directories exist...")
    assert os.path.exists(train_path)
    assert os.path.exists(test_path)
    print("=> Success!")


if __name__ == "__main__":
    datasets_folder = get_datasets_folder()
    check_imdb(datasets_folder)

    print("\nLoading Labeled Faces Data (~200MB)")
    from sklearn.datasets import fetch_lfw_people
    fetch_lfw_people(min_faces_per_person=70,
                     resize=0.4,
                     data_home=datasets_folder)
    print("=> Success!")
Esempio n. 37
0
def main(argv = None):
    if argv is None:
        argv = sys.argv

    # cascade_path = sys.argv[1]
    # image_path = sys.argv[2]
    cascade_path = "/usr/share/OpenCV/haarcascades/haarcascade_frontalface_default.xml"
    # image_path = "/home/gbriones/Downloads/test2.jpg"
    image_path = "/home/gbriones/Downloads/tony_blair_00.jpg"
    result_path = sys.argv[3] if len(sys.argv) > 3 else None

    cascade = cv2.CascadeClassifier(cascade_path)
    # import pdb; pdb.set_trace()
    image = cv2.imread(image_path)
    if image is None:
        print("ERROR: Image did not load.")
        return 2

    gray_image, detections = cascade_detect(cascade, image)
    crop_images = detections_draw(gray_image, detections)
    resized_image = cv2.resize(crop_images[0], (37, 50))

    ###############################################################################
    # Download the data, if not already on disk and load it as numpy arrays

    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

    # introspect the images arrays to find the shapes (for plotting)
    n_samples, h, w = lfw_people.images.shape

    # for machine learning we use the 2 data directly (as relative pixel
    # positions info is ignored by this model)
    X = lfw_people.data
    n_features = X.shape[1]

    # the label to predict is the id of the person
    y = lfw_people.target
    target_names = lfw_people.target_names
    n_classes = target_names.shape[0]
    n_components = 150
    print(target_names)

    # import pdb; pdb.set_trace()

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X.shape[0]))
    t0 = time()
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X)
    print("done in %0.3fs" % (time() - t0))

    eigenfaces = pca.components_.reshape((n_components, h, w))
    import pdb; pdb.set_trace()
    print("Projecting the input data on the eigenfaces orthonormal basis")
    t0 = time()
    X_pca = pca.transform(X)
    X_test_pca = pca.transform([resized_image.flatten()])
    print("done in %0.3fs" % (time() - t0))


    ###############################################################################
    # Train a SVM classification model

    print("Fitting the classifier to the training set")
    t0 = time()
    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }

    if os.path.isfile('filename.pkl'):
        clf = joblib.load('filename.pkl')
    else:
        clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced', probability=True), param_grid)
        clf = clf.fit(X_pca, y)
        joblib.dump(clf, 'filename.pkl')
    print("done in %0.3fs" % (time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)

    print("Predicting people's names on the test set")
    t0 = time()
    y_pred = clf.predict(X_test_pca)
    print("done in %0.3fs" % (time() - t0))
    print(y_pred[0])
    print(target_names[y_pred[0]])


    print("Found {0} objects!".format(len(detections)))
    if result_path is None:
        # cv2.imshow("Objects found", resized_image)
        # cv2.waitKey(0)
        # plot_image(resized_image)
        images = [resized_image.flatten()]
        # import pdb; pdb.set_trace()
        titles = ["Original"]
        for index in range(len(y)):
            if y[index] == y_pred[0] and len(images) < 12:
                images.append(X[index])
                titles.append(target_names[y[index]])
        plot_gallery(images, titles, h, w)
        eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
        plot_gallery(eigenfaces, eigenface_titles, h, w)
        plt.show()
    else:
        cv2.imwrite(result_path, image)
Esempio n. 38
0
def test_load_empty_lfw_people():
    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA, download_if_missing=False)
Esempio n. 39
0
from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.svm import SVC

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')


###############################################################################
# Download the data, if not already on disk and load it as numpy arrays

lfw_people = fetch_lfw_people(data_home='.', min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# fot machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

print "Total dataset size:"
"""Demo113_NMF_LFWPeople.ipynb

# **Tame Your Python**
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
print(tf.__version__)

import matplotlib.pyplot as plt
from sklearn.datasets import fetch_lfw_people

# Load data
dataset = fetch_lfw_people(min_faces_per_person=100)

N, H, W = dataset.images.shape
X = dataset.data
y = dataset.target
target_names = dataset.target_names

print(target_names)

print(dataset.images.shape)
print(dataset.data.shape)
print(dataset.target.shape)

print(H * W)

from sklearn.model_selection import train_test_split
Esempio n. 41
0
import matplotlib.pyplot as plt
import numpy as np
import time as time

#import the machine learning packages
from sklearn.datasets import fetch_lfw_people
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Downloading the data.
# From the servers and if the data is alredy present.
# In the current working directory load them as numpy array
lfw_people = fetch_lfw_people( min_faces_per_person = 70)


n_samples, h, w = lfw_people.images.shape


#load the data into a variable its target values
# and its target values or say expected values in another variable

X = lfw_people.data # Feature Vector
y = lfw_people.target # Target Variable


n_images = X.shape[0] # Number of Images
n_features = X.shape[1] # Number of Features
person_name = lfw_people.target_names # Name of the person in the images
Esempio n. 42
0
def test_comp(settings, random_sid=9):
    import keras
    from keras.optimizers import SGD
    from keras.datasets import mnist, fashion_mnist, cifar10
    from skimage import filters
    from keras import backend as K
    from keras_utils import WeightHistory as WeightHistory
    from keras_utils import RecordVariable, \
    PrintLayerVariableStats, PrintAnyVariable, SGDwithLR, eval_Kdict, standarize_image_025
    from keras_preprocessing.image import ImageDataGenerator

    K.clear_session()

    epochs = settings['Epochs']
    batch_size = settings['batch_size']

    sid = random_sid
    np.random.seed(sid)
    tf.random.set_random_seed(sid)
    tf.compat.v1.random.set_random_seed(sid)

    # MINIMUM SIGMA CAN EFFECT THE PERFORMANCE.
    # BECAUSE NEURON CAN GET SHRINK TOO MUCH IN INITIAL EPOCHS WITH LARGER GRADIENTS
    #, and GET STUCK!
    MIN_SIG = 0.01
    MAX_SIG = 1.0
    MIN_MU = 0.0
    MAX_MU = 1.0
    lr_dict = {'all': settings['lr_all']}  #0.1 is default for MNIST
    mom_dict = {'all': 0.9}
    decay_dict = {'all': 0.9}
    clip_dict = {}
    for i, n in enumerate(settings['nhidden']):
        lr_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.01})
        lr_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.01})
        lr_dict.update({'focus-' + str(i + 1) + '/Weights:0': 0.1})

        mom_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.9})
        mom_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9})

        decay_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.5})
        decay_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9})

        clip_dict.update(
            {'focus-' + str(i + 1) + '/Sigma:0': (MIN_SIG, MAX_SIG)})
        clip_dict.update({'focus-' + str(i + 1) + '/Mu:0': (MIN_MU, MAX_MU)})

    print("Loading dataset")
    if settings['dset'] == 'mnist':
        # input image dimensions
        img_rows, img_cols = 28, 28
        # the data, split between train and test sets
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        n_channels = 1

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64')
        if settings['cnn_model']:

            decay_epochs = [e_i * 30, e_i * 100]

    elif settings['dset'] == 'cifar10':
        img_rows, img_cols = 32, 32
        n_channels = 3

        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        # works good as high as 77 for cnn-focus
        #decay_dict = {'all':0.9, 'focus-1/Sigma:0': 1.1,'focus-1/Mu:0':0.9,
        #          'focus-2/Sigma:0': 1.1,'focus-2/Mu:0': 0.9}
        #if cnn_model: batch_size=256 # this works better than 500 for cifar-10
        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 30, e_i * 80, e_i * 120, e_i * 180],
                                dtype='int64')
        #decay_epochs =np.array([e_i*10], dtype='int64')

    elif settings['dset'] == 'fashion':
        img_rows, img_cols = 28, 28
        n_channels = 1

        (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64')
        if settings['cnn_model']:
            decay_dict = {
                'all': 0.9,
                'focus-1/Sigma:0': 0.9,
                'focus-1/Mu:0': 0.9,
                'focus-2/Sigma:0': 0.9,
                'focus-2/Mu:0': 0.9
            }

            decay_epochs = [e_i * 30, e_i * 100]

    elif settings['dset'] == 'mnist-clut':

        img_rows, img_cols = 60, 60
        # the data, split between train and test sets

        folder = '/media/home/rdata/image/'
        data = np.load(folder + "mnist_cluttered_60x60_6distortions.npz")

        x_train, y_train = data['x_train'], np.argmax(data['y_train'], axis=-1)
        x_valid, y_valid = data['x_valid'], np.argmax(data['y_valid'], axis=-1)
        x_test, y_test = data['x_test'], np.argmax(data['y_test'], axis=-1)
        x_train = np.vstack((x_train, x_valid))
        y_train = np.concatenate((y_train, y_valid))
        n_channels = 1

        lr_dict = {'all': 0.01}

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64')
        if settings['cnn_model']:
            decay_epochs = [e_i * 30, e_i * 100]

    elif settings['dset'] == 'lfw_faces':
        from sklearn.datasets import fetch_lfw_people
        lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=0.4)

        # introspect the images arrays to find the shapes (for plotting)
        n_samples, img_rows, img_cols = lfw_people.images.shape
        n_channels = 1

        X = lfw_people.data
        n_features = X.shape[1]

        # the label to predict is the id of the person
        y = lfw_people.target
        target_names = lfw_people.target_names
        n_classes = target_names.shape[0]

        print("Total dataset size:")
        print("n_samples: %d" % n_samples)
        print("n_features: %d" % n_features)
        print("n_classes: %d" % n_classes)

        from sklearn.model_selection import train_test_split

        #X -= X.mean()
        #X /= X.std()
        #split into a training and testing set
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.25,
                                                            random_state=42)

        import matplotlib.pyplot as plt

        plt.imshow(X[0].reshape((img_rows, img_cols)))
        plt.show()
        lr_dict = {'all': 0.001}

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 50, e_i * 100, e_i * 150],
                                dtype='int64')

    num_classes = np.unique(y_train).shape[0]
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows,
                                  img_cols)
        x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows,
                                img_cols)
        input_shape = (n_channels, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols,
                                  n_channels)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols,
                                n_channels)
        input_shape = (img_rows, img_cols, n_channels)
    if settings['dset'] != 'mnist-clut':

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')

        x_train, _, x_test = standarize_image_025(x_train, tst=x_test)
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols,
                                  n_channels)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols,
                                n_channels)

    input_shape = (img_rows, img_cols, n_channels)

    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    sigma_reg = settings['focus_sigma_reg']
    sigma_reg = keras.regularizers.l2(
        sigma_reg) if sigma_reg is not None else sigma_reg
    settings['focus_sigma_reg'] = sigma_reg
    if settings['cnn_model']:
        model = create_cnn_model(input_shape, num_classes, settings=settings)
    else:
        model = create_simple_model(input_shape,
                                    num_classes,
                                    settings=settings)

    model.summary()

    print(lr_dict)
    print(mom_dict)
    print(decay_dict)
    print(clip_dict)

    opt = SGDwithLR(lr_dict, mom_dict, decay_dict, clip_dict,
                    decay_epochs)  #, decay=None)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy'])

    stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: ']
    stat_func_list = [np.max, np.mean, np.min, np.var, np.std]
    #callbacks = [tb]
    callbacks = []

    if settings['neuron'] == 'focused':
        pr_1 = PrintLayerVariableStats("focus-1", "Weights:0", stat_func_list,
                                       stat_func_name)
        pr_2 = PrintLayerVariableStats("focus-1", "Sigma:0", stat_func_list,
                                       stat_func_name)
        pr_3 = PrintLayerVariableStats("focus-1", "Mu:0", stat_func_list,
                                       stat_func_name)
        rv_weights_1 = RecordVariable("focus-1", "Weights:0")
        rv_sigma_1 = RecordVariable("focus-1", "Sigma:0")
        rv_mu_1 = RecordVariable("focus-1", "Mu:0")
        print_lr_rates_callback = keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print(
                "iter: ", K.eval(model.optimizer.iterations), " LR RATES :",
                eval_Kdict(model.optimizer.lr)))

        callbacks += [
            pr_1, pr_2, pr_3, rv_weights_1, rv_sigma_1, rv_mu_1,
            print_lr_rates_callback
        ]

    if not settings['augment']:
        print('Not using data augmentation.')
        history = model.fit(x_train,
                            y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            validation_data=(x_test, y_test),
                            shuffle=True,
                            callbacks=callbacks)
    else:
        print('Using real-time data augmentation.')
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            # set input mean to 0 over the dataset
            featurewise_center=False,
            # set each sample mean to 0
            samplewise_center=False,
            # divide inputs by std of dataset
            featurewise_std_normalization=False,
            # divide each input by its std
            samplewise_std_normalization=False,
            # apply ZCA whitening
            zca_whitening=False,
            # epsilon for ZCA whitening
            zca_epsilon=1e-06,
            # randomly rotate images in the range (deg 0 to 180)
            rotation_range=0,
            # randomly shift images horizontally
            width_shift_range=0.1,
            # randomly shift images vertically
            height_shift_range=0.1,
            # set range for random shear
            shear_range=0.,
            # set range for random zoom
            zoom_range=0.,
            # set range for random channel shifts
            channel_shift_range=0.,
            # set mode for filling points outside the input boundaries
            fill_mode='nearest',
            # value used for fill_mode = "constant"
            cval=0.,
            # randomly flip images
            horizontal_flip=True,
            # randomly flip images
            vertical_flip=False,
            # set rescaling factor (applied before any other transformation)
            rescale=None,
            # set function that will be applied on each input
            preprocessing_function=None,
            # image data format, either "channels_first" or "channels_last"
            data_format='channels_last',
            # fraction of images reserved for validation (strictly between 0 and 1)
            validation_split=0.0)

        # Compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)

        # Fit the model on the batches generated by datagen.flow().
        history = model.fit_generator(datagen.flow(x_train,
                                                   y_train,
                                                   batch_size=batch_size),
                                      validation_data=(x_test, y_test),
                                      epochs=epochs,
                                      verbose=1,
                                      workers=4,
                                      callbacks=callbacks,
                                      steps_per_epoch=x_train.shape[0] //
                                      batch_size)

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    return score, history, model, callbacks
def lfwTest02():
	#from __future__ import print_function
	#学习对应模块间的接口, 数据格式

	from time import time
	import logging
	import matplotlib.pyplot as plt

	from sklearn.cross_validation import train_test_split
	from sklearn.datasets import fetch_lfw_people
	from sklearn.grid_search import GridSearchCV
	from sklearn.metrics import classification_report
	from sklearn.metrics import confusion_matrix
	from sklearn.decomposition import RandomizedPCA
	from sklearn.svm import SVC

	lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) #这里的min_faces_per_person是用来限制读取图片的数据
	#lfw_people = fetch_lfw_people(min_faces_per_person=5, resize=0.4)

	# introspect the images arrays to find the shapes (for plotting)
	n_samples, h, w = lfw_people.images.shape

	X = lfw_people.data
	n_features = X.shape[1]

	# the label to predict is the id of the person
	y = lfw_people.target
	target_names = lfw_people.target_names
	n_classes = target_names.shape[0]

	#print target_names.shape

	#print("Total dataset size:")
	#print("n_samples: %d" % n_samples)
	#print("n_features: %d" % n_features)
	#print("n_classes: %d" % n_classes)
	#print("h: %d" % h)
	#print("w: %d" % w)
	#print lfw_people
	#print target_names


	###############################################################################
	# Split into a training set and a test set using a stratified k fold

	# split into a training and testing set
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

	#X_train 是966 * 1850的二维矩阵

	print X_train
	print len(X_train)
	print len(X_train[0])

	#singleImage = X[1].reshape(h, w)
	#
	##显示图片
	#plt.imshow(singleImage, cmap = plt.cm.gray_r)
	#plt.show()


	# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
	# dataset): unsupervised feature extraction / dimensionality reduction
	n_components = 150

	print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0]))
	t0 = time()
	pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) #这里是利用PCA获得主成分
	print("done in %0.3fs" % (time() - t0))


	print pca.components_   #这里的pca.components_是150 * 1850的矩阵 -- 可以理解为就是找了150个向量, 每一向量都是原来所有样本的某一种线性组合(因此特征维数不会变)

	print len(pca.components_)
	print len(pca.components_[0])


	eigenfaces = pca.components_.reshape((n_components, h, w)) #将向量转化为图片
																#这里可以认为特征脸是原始若干张人脸的线性叠加


	print("Projecting the input data on the eigenfaces orthonormal basis")
	t0 = time()
	X_train_pca = pca.transform(X_train) #把原始图片投影到eigenfaces空间里
	X_test_pca = pca.transform(X_test)
	print("done in %0.3fs" % (time() - t0))


	#print len(eigenfaces[0])

	print X_train_pca			#X_train_pca是966 * 150维矩阵, 其150维的每一维都是原始的train向量在对应eigenface上的投影
	print len(X_train_pca)		#所以train_pca是一组float, 不是int
	print len(X_train_pca[0])

	#print y_train #y_train就是label, 0-6 表示类别

	#显示图片
	#plt.imshow(eigenfaces[-1], cmap = plt.cm.gray_r)
	#plt.show()


	# Train a SVM classification model

	print("Fitting the classifier to the training set")
	t0 = time()
	param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
				  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
	clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid) #注意这里是用svm进行分类,所以是svc
	clf = clf.fit(X_train_pca, y_train) #输入为转换后的pca数据和y_train进行训练 -- 多类别svm
	print("done in %0.3fs" % (time() - t0))
	print("Best estimator found by grid search:")
	print(clf.best_estimator_)

	# Quantitative evaluation of the model quality on the test set

	print("Predicting people's names on the test set")
	t0 = time()
	y_pred = clf.predict(X_test_pca) #测试数据也是在相同的eigenface空间内进行投影后的结果
	print("done in %0.3fs" % (time() - t0))

	print(classification_report(y_test, y_pred, target_names=target_names))
	print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))


	###############################################################################
	# Qualitative evaluation of the predictions using matplotlib

	def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
		"""Helper function to plot a gallery of portraits"""
		plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
		plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
		for i in range(n_row * n_col):
			plt.subplot(n_row, n_col, i + 1)
			plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
			plt.title(titles[i], size=12)
			plt.xticks(())
			plt.yticks(())


	# plot the result of the prediction on a portion of the test set

	def title(y_pred, y_test, target_names, i):
		pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
		true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
		return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)

	prediction_titles = [title(y_pred, y_test, target_names, i)
						 for i in range(y_pred.shape[0])]

	plot_gallery(X_test, prediction_titles, h, w)

	# plot the gallery of the most significative eigenfaces

	eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
	plot_gallery(eigenfaces, eigenface_titles, h, w)

	plt.show()
Esempio n. 44
0
#5. Construct a one-dimensional feature vector from the information in each cell.

from skimage import data, color, feature
import skimage.data
image = color.rgb2gray(data.chelsea())
hog_vec, hog_vis = feature.hog(image, visualise=True)
fig, ax = plt.subplots(1, 2, figsize=(12, 6),
subplot_kw=dict(xticks=[], yticks=[]))
ax[0].imshow(image, cmap='gray')
ax[0].set_title('input image')
ax[1].imshow(hog_vis)
ax[1].set_title('visualization of HOG features');

#obtain a set of positive training samples
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people()
positive_patches = faces.images
positive_patches.shape

#obtain a set of negative training samples
from skimage import data, transform
imgs_to_use = ['camera', 'text', 'coins', 'moon', 'page', 'clock', 'immunohistochemistry', 'chelsea', 'coffee', 'hubble_deep_field']
images = [color.rgb2gray(getattr(data, name)()) for name in imgs_to_use]

from sklearn.feature_extraction.image import PatchExtractor
def extract_patches(img, N, scale=1.0, patch_size=positive_patches[0].shape):
    extracted_patch_size = \
    tuple((scale * np.array(patch_size)).astype(int))
    extractor = PatchExtractor(patch_size=extracted_patch_size, max_patches=N, random_state=0)
    patches = extractor.transform(img[np.newaxis])
    if scale != 1:
Esempio n. 45
0
fig, ax = plt.subplots(1, 2, figsize=(16, 6))
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)

for axi, C in zip(ax, [10.0, 0.1]):
    model = SVC(kernel='linear', C=C).fit(X, y)
    axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
    plot_svc_decision_function(model, axi)
    axi.scatter(model.support_vectors_[:, 0],
                model.support_vectors_[:, 1],
                s=300, lw=1, facecolors='none')
    axi.set_title('C = {0:.1f}'.format(C), size=14)

#%% Example: face recognition
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)

#%% Plotting a few of these face
fig, ax = plt.subplots(3, 5)
for i, axi in enumerate(ax.flat):
    axi.imshow(faces.images[i], cmap='bone')
    axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]])

#%% extract fundamental components & apply svm
from sklearn.decomposition import PCA as RandomizedPCA
from sklearn.pipeline import make_pipeline

pca = RandomizedPCA(n_components=150, whiten=True, random_state=42)
svc = SVC(kernel='rbf', class_weight='balanced')
Esempio n. 46
0
def fetch_dataset():
    # labelled faces in the wild data with users more than 100 faces
    dataset = fetch_lfw_people(min_faces_per_person=100)
    return dataset
Esempio n. 47
0
from sklearn import datasets
lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=0.4, data_home='data')
Esempio n. 48
0
print(math.acos(1/math.sqrt(2)))
print('45гр=',math.pi/4)

#t=1
#alpha=beta=1/math.sqrt(2)

#Каковы собственные значения матрицы X^TX , где X – матрица, соответствующая отмасштабированной выборке?
X_scaled.dot(np.array([1./np.sqrt(2), 1./np.sqrt(2)]))


print(' Каковы собственные значения матрицы X^TX , где X – матрица, соответствующая отмасштабированной выборке?',np.linalg.eig(X_scaled.T.dot(X_scaled))[0])
sing=np.linalg.eig(X_scaled.dot(X_scaled.T))[0]

print('В чем смысл двух чисел из прошлого вопроса? эти числа говорят о том, какую часть дисперсии исходных данных объясняют главные компоненты')

lfw_people = datasets.fetch_lfw_people(min_faces_per_person=50,
                resize=0.4, data_home='faces.dat')

print('%d objects, %d features, %d classes' % (lfw_people.data.shape[0],
      lfw_people.data.shape[1], len(lfw_people.target_names)))
#print('\nPersons:')
for name in lfw_people.target_names:
#    print(name)
    fig = plt.figure(figsize=(8, 6))

#Посмотрим на содержимое датасета. Все изображения лежат в массиве lfw_people.images
for i in range(15):
    ax = fig.add_subplot(3, 5, i + 1, xticks=[], yticks=[])
    ax.imshow(lfw_people.images[i], cmap='gray')


#Какое минимальное число компонент PCA необходимо, чтобы объяснить 90% дисперсии
Esempio n. 49
0
class FaceRec(ModelTrafficSign):
    from sklearn.datasets import fetch_lfw_people
    people = fetch_lfw_people(color=True, min_faces_per_person=25)

    NB_LABELS = len(set(people.target))
Esempio n. 50
0
def name_title(predictions, i):
    pred_name = target_names[predictions[i]].rsplit(' ', 1)[-1]
    return pred_name


def prediction_title(predictions, actual, target_names, i):
    pred_name = target_names[predictions[i]].rsplit(' ', 1)[-1]
    true_name = target_names[actual[i]].rsplit(' ', 1)[-1]
    return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)


#load data and split into test/train sets, one-hot encode labels
print("\nLoading LFW dataset")
lfw_people = fetch_lfw_people(data_home='.cache',
                              min_faces_per_person=70,
                              slice_=(slice(75, 200), slice(75, 200)),
                              resize=0.4,
                              color=False)
_, h, w = lfw_people.images.shape
images = lfw_people.data
labels = lfw_people.target
target_names = lfw_people.target_names
X_train, X_test, y_train, y_test = train_test_split(images,
                                                    labels,
                                                    test_size=0.25,
                                                    random_state=42)

#compute pca to extract eigenfaces
pca = PCA(n_components=50, svd_solver='randomized', whiten=True).fit(X_train)
eigenfaces = pca.components_.reshape((50, h, w))
        data = numpy.asarray(digits.data, dtype='float32')
        target = numpy.asarray(digits.target, dtype='int32')
        nudged_x, nudged_y = nudge_dataset(data, target)
        if SCALE:
            nudged_x = preprocessing.scale(nudged_x)
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
                nudged_x, nudged_y, test_size=0.2, random_state=42)
        train_models(x_train, y_train, x_test, y_test, nudged_x.shape[1],
                     len(set(target)), numpy_rng=numpy.random.RandomState(123),
                     name='digits')

    if FACES:
        import logging
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(message)s')
        lfw_people = datasets.fetch_lfw_people(min_faces_per_person=50,
                                               resize=0.4)
        X = numpy.asarray(lfw_people.data, dtype='float32')
        if SCALE:
            X = preprocessing.scale(X)
        y = numpy.asarray(lfw_people.target, dtype='int32')
        target_names = lfw_people.target_names
        print("Total dataset size:")
        print("n samples: %d" % X.shape[0])
        print("n features: %d" % X.shape[1])
        print("n classes: %d" % target_names.shape[0])
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
                    X, y, test_size=0.2, random_state=42)

        train_models(x_train, y_train, x_test, y_test, X.shape[1],
                     len(set(y)), numpy_rng=numpy.random.RandomState(123),
                     name='faces')
Esempio n. 52
0
"""
Name : Roshan Zameer Syed
ID: 99999-2920
Project 7 : Support vector machine for the face classification problem
"""
from sklearn.datasets import fetch_lfw_people
import pandas as pd
import numpy as np

faces = fetch_lfw_people(min_faces_per_person=60)   #Importing the data set with min faces = 60
n_samples, h, w = faces.images.shape
print('Target names: ', faces.target_names)                           #Printing the target names
print('Shape of the data: ', faces.images.shape)                           #Shape of the data
X = faces.data
#print(X)
print(faces.data.shape)
n_features = faces.data.shape[1]                    # features is the dimension
print(n_features)

y = faces.target
print(y)
target_names = faces.target_names
n_classes = target_names.shape[0]
print(n_classes)

print("n_samples: %d" % n_samples)                  # Print number of samples
print("n_features: %d" % n_features)                #Print number of features
print("n_classes: %d" % n_classes)                  #Print number of classes

# Splitting the data set to training and testing data set
from sklearn.model_selection import train_test_split
Esempio n. 53
0
def test_load_fake_lfw_people_too_restrictive():
    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100, download_if_missing=False)
Esempio n. 54
0
                                 'yticks': []
                             },
                             gridspec_kw=dict(hspace=0.1, wspace=0.1))
    for i, ax in enumerate(axes.flat):
        ax.imshow(faces[i].reshape(62, 47), cmap='bone')
    plt.show()


# def test_small():
#     X = np.random.random(size=[5, 15])
#     E = np.ones(X.shape)*1e-6
#     eRPCA(X, E)

if __name__ == '__main__':
    # test_small()
    faces = fetch_lfw_people()
    random_indexes = np.random.permutation(len(faces.data))
    X = faces.data[random_indexes]
    # example_faces = X[:36, :]
    # plot_faces (example_faces)

    import random
    random.seed(2)

    faces2 = fetch_lfw_people(min_faces_per_person=250)
    random_indexes = np.random.permutation(len(faces2.data))
    X = faces2.data[random_indexes]
    example_faces2 = X[:10, :]
    test = example_faces2[0]
    for _ in test:
        print(_)
Esempio n. 55
0
# coding:utf-8

import logging
from time import time
from sklearn.datasets import fetch_lfw_people
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import RandomizedPCA
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")

lfw_people = fetch_lfw_people(data_home="D:\\My documents\\code\\dataset\\", resize=0.4)

n_samples, h, w = lfw_people.images.shape

X = lfw_people.data
n_features = X.shape[1]

y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

print ("Total dataset size:")
print ("n_samples: %d" % n_samples)
print ("n_features: %d" % n_features)
print ("n_classes: %d" % n_classes)
Esempio n. 56
0
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 30 10:48:20 2018

@author: lenovo
"""
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.python.framework import ops
import random
import struct
from sklearn.datasets import fetch_lfw_people
import math
#1.读取lfw数据,裁剪成80*80大小 
lfw=fetch_lfw_people(data_home=None,resize=0.9)
n_samples,h,w=lfw.images.shape
lfw.images=lfw.images[0:13233,16:96,2:82]#共13233幅人脸图片,每幅大小为80*80
choose_images_as_train = random.sample(range(0,13233,1),10000)#选出10000张作为训练集,3233张作为验证集
lfw_train_images = lfw.images[choose_images_as_train]
lfw_validation_images = lfw.images[np.delete(range(0,13233,1),choose_images_as_train)]
'''
#画图
plt.figure()
plt.imshow(lfw_train_images[0])
plt.show()
'''

#2.随机置换矩阵
def swapRows(M, r1, r2):  
    M[r1],M[r2] = M[r2],M[r1]  
Esempio n. 57
0
from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.svm import SVC

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')


###############################################################################
# Download the data, if not already on disk and load it as numpy arrays
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape
np.random.seed(42)

# for machine learning we use the data directly (as relative pixel
# position info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
Esempio n. 58
0
def test_load_fake_lfw_people_too_restrictive():
    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100, download_if_missing=False)
Esempio n. 59
0
plt.legend(loc='upper right')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('loss')

# pooling : 차원 축소 기법, 의미 있는 신호만 전달
# - maxpooling : 가장 큰 신호만 전달

# dropout : 차원 축소 기법, 신호를 아예 꺼버리는 방식

# [ 연습 문제 - 얼굴인식 data의 deep learning model 적용 ]
# 1. ANN
# data loading
from sklearn.datasets import fetch_lfw_people
people = fetch_lfw_people(min_faces_per_person=20, resize=0.7)

people.data.shape

# down sampling
v_nrow = []
for i in np.unique(people.target):
    nrow = np.where(people.target == i)[0][:50]
    v_nrow = v_nrow + list(nrow)

people_x = people.data[v_nrow]
people_y = people.target[v_nrow]

# train, test data split
train_x, test_x, train_y, test_y = train_test_split(people_x,
                                                    people_y,
Esempio n. 60
0
def load_dataset(dset, normalize_data, options):
    if dset == 'mnist':
        # input image dimensions
        img_rows, img_cols = 28, 28
        # the data, split between train and test sets
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        print(x_train.shape)
        n_channels = 1

    elif dset == 'cifar10':
        img_rows, img_cols = 32, 32
        n_channels = 3

        (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    elif dset == 'fashion':
        img_rows, img_cols = 28, 28
        n_channels = 1

        (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

    elif dset == 'mnist-clut':

        img_rows, img_cols = 60, 60
        # the data, split between train and test sets

        #folder='/media/home/rdata/image/'
        folder = '/home/btek/datasets/image/'
        data = np.load(folder + "mnist_cluttered_60x60_6distortions.npz",
                       allow_pickle=True)
        y_trn = data['y_train']
        y_val = data['y_valid']
        y_tst = data['y_test']
        x_train, y_train = data['x_train'], np.argmax(y_trn, axis=-1)
        x_valid, y_valid = data['x_valid'], np.argmax(y_val, axis=-1)
        x_test, y_test = data['x_test'], np.argmax(y_tst, axis=-1)
        x_train = np.vstack((x_train, x_valid))
        y_train = np.concatenate((y_train, y_valid))
        n_channels = 1
        normalize_data = False  # this dataset is already somehow normalized

        #decay_epochs =[e_i*30,e_i*100]

    elif dset == 'lfw_faces':
        from sklearn.datasets import fetch_lfw_people
        lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=0.4)

        # introspect the images arrays to find the shapes (for plotting)
        n_samples, img_rows, img_cols = lfw_people.images.shape
        n_channels = 1

        X = lfw_people.data
        n_features = X.shape[1]

        # the label to predict is the id of the person
        y = lfw_people.target
        target_names = lfw_people.target_names
        num_classes = target_names.shape[0]

        from sklearn.model_selection import train_test_split

        #X -= X.mean()
        #X /= X.std()
        #split into a training and testing set
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.25)

    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows,
                                  img_cols)
        x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows,
                                img_cols)
        input_shape = (n_channels, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols,
                                  n_channels)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols,
                                n_channels)
        input_shape = (img_rows, img_cols, n_channels)
        ''' why I have written this?? BTEK
        if(n_channels==1):
            x_train = np.repeat(x_train,3, axis=3)
            x_test = np.repeat(x_test,3, axis=3)
            n_channels=3
            input_shape = (img_rows, img_cols, n_channels)
        '''
    num_classes = np.shape(np.unique(y_train))[0]
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    if normalize_data:
        #Simple norm 0.1
        #x_train /= 255
        #x_test /= 255

        #Standard norm mean 0 , std 1, per input
        #this normalization is very bad. BTEK for IMAGES
        #trn_mn = np.mean(x_train, axis=0) this normalization is very bad. BTEK for IMAGES
        #trn_std = np.std(x_train, axis=0) this normalization is very bad. BTEK for IMAGES

        # Standard for mean 127 and std per image.
        # This does not have 0 mean  but some negative value
        # Std is 1.0 some paper results wer taken by this I guess
        # trn_mn = np.mean(x_train)
        # trn_std = np.std(x_train)
        # x_train -= 127.0   # I use this because other normalizations do not create symmetric distribution.
        # x_test -= 127.0
        # x_train/=(trn_std+1e-7)
        # x_test/=(trn_std+1e-7)
        # print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train))
        # print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test))

        # Standard for mean 127 and std per image.
        # This does not have 0 mean  and std is not 1.0
        # Std is
        #        x_train /= (255/4)
        #        x_test /= (255/4)
        #        x_train -= 2.0
        #        x_test  -=  2.0
        #        print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train))
        #        print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test))
        # CHANGİNG THİS aug2020 FOR FACES TEST
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255
        trn_mn = np.mean(x_train, axis=0)
        x_train -= trn_mn
        x_test -= trn_mn
        print('x_train shape:', x_train.shape)
        print("Data normed Mean(train):", np.mean(x_train), " Std(train):",
              np.std(x_train))
        print("Data normed Mean(test):", np.mean(x_test), " Std(test):",
              np.std(x_test))

        # non-zero normalization.


#        trn_mn = np.mean(x_train[np.nonzero(x_train)])
#        trn_std = np.std(x_train[np.nonzero(x_train)])
#        x_train[np.nonzero(x_train)] -= trn_mn
#        x_test[np.nonzero(x_test)] -= trn_mn
#        print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train))
#        print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test))
#        x_train/=(trn_std+1e-7)
#        x_test/=(trn_std+1e-7)
#        print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train))
#        print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test))

    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    return x_train, y_train, x_test, y_test, input_shape, num_classes