def test_load_fake_lfw_people(): lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=3, download_if_missing=False) # The data is croped around the center as a rectangular bounding box # around the face. Colors are converted to gray levels: assert_equal(lfw_people.images.shape, (10, 62, 47)) assert_equal(lfw_people.data.shape, (10, 2914)) # the target is array of person integer ids assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2]) # names of the persons can be found using the target_names array expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez'] assert_array_equal(lfw_people.target_names, expected_classes) # It is possible to ask for the original data without any croping or color # conversion and not limit on the number of picture per person lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None, slice_=None, color=True, download_if_missing=False) assert_equal(lfw_people.images.shape, (17, 250, 250, 3)) # the ids and class names are the same as previously assert_array_equal(lfw_people.target, [0, 0, 1, 6, 5, 6, 3, 6, 0, 3, 6, 1, 2, 4, 5, 1, 2]) assert_array_equal(lfw_people.target_names, ['Abdelatif Smith', 'Abhati Kepler', 'Camara Alvaro', 'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
def get_eigenfaces(): # get sklearn faces data set lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=1.0) n_samples, h, w = lfw_people.images.shape np.random.seed(42) # get face data print "Getting LFW people data from SKLearn..." X = lfw_people.data # subtract average row from each row print "Normalizing image array..." mean_image = np.mean(X, axis = 0) arr_norm = np.zeros([n_samples, h*w]) arr_norm = X - mean_image # run pca using the signular value decomposition print "Running PCA of input image set. This may take a few moments." pca = PCA() pca.fit(arr_norm) eigenfaces = pca.components_ # Save images print "Saving eigenfaces..." path = 'static/eigenface_images/' for i, face in enumerate(eigenfaces[:50]): process_image.save_image_vector(path,str(i),face) print "Complete! Saving pickle files..." input_data = {'mean_image': mean_image, 'eigenfaces': eigenfaces, 'arr_norm': arr_norm} f = open('eigenface_data.p', 'wb') pickle.dump(input_data, f) f.close() print "Pickle files saved. Shutting up shop now."
def visualize(): """ Writes out various visualizations of our testing data." """ print "Preparing visualizations..." tile_faces(fetch_lfw_people()["images"], constants.LOG_DIR + "/all_faces_tiled.png")
def get_lfw(): lfw = fetch_lfw_people(resize=1) lfw.data = lfw.data.astype(np.float32) / 255.0 lfw.target = lfw.target.astype(np.int32) return lfw.data, lfw.target
def dictionary_learn_ex(): patch_shape = (18, 18) n_atoms = 225 n_plot_atoms = 225 n_nonzero_coefs = 2 n_jobs = 8 lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,color=False) n_imgs, h, w = lfw_people.images.shape imgs = [] for i in range(n_imgs): img = lfw_people.images[i, :, :].reshape((h, w)) img /= 255. imgs.append(img) print 'Extracting reference patches...' X = extract_patches(imgs, patch_size=patch_shape[0],scale=False,n_patches=int(1e5),verbose=True,n_jobs=n_jobs) print "number of patches:", X.shape[1] se = sparse_encoder(algorithm='bomp',params={'n_nonzero_coefs': n_nonzero_coefs}, n_jobs=n_jobs) odc = online_dictionary_coder(n_atoms=n_atoms, sparse_coder=se, n_epochs=2, batch_size=1000, non_neg=False, verbose=True, n_jobs=n_jobs) odc.fit(X) D = odc.D plt.figure(figsize=(4.2, 4)) for i in range(n_plot_atoms): plt.subplot(15, 15, i + 1) plt.imshow(D[:, i].reshape(patch_shape), cmap=plt.cm.gray) plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.0, hspace=0.0) plt.xticks(()) plt.yticks(()) plt.show()
def get_lfw(max_size=None): dataset = fetch_lfw_people(color=True) # keep only one image per person return image_per_label( dataset.images, dataset.target, dataset.target_names, max_size=max_size)
def _download_lwf(dataset,size): from sklearn.datasets import fetch_lfw_people ''' :param dataset: :return: ''' lfw_people = fetch_lfw_people(color=True,resize=size) f = gzip.open(dataset, 'w') cPkl.dump([lfw_people.images.astype('uint8'),lfw_people.target], f, protocol=cPkl.HIGHEST_PROTOCOL) f.close()
def generateface2picsmapping(minimum_faces_per_person=1): lfw_people = fetch_lfw_people(min_faces_per_person=minimum_faces_per_person, resize=0.4) n_samples, h, w = lfw_people.images.shape X, y, target_names = lfw_people.data, lfw_people.target, lfw_people.target_names n_examples, n_features = X.shape face2pics = [] print(max(y)) for i in range((max(y)+1)): face2pics.append([target_names[i],[] ]) for i in range(len(y)): face2pics[y[i]][1].append(i) return face2pics
def getData2(): global X, n, d, y, h, w lfw_people = fetch_lfw_people(min_faces_per_person=40, resize=0.4) n, h, w = lfw_people.images.shape X = lfw_people.data d = X.shape[1] y = lfw_people.target n_classes = lfw_people.target_names.shape[0] print("Total dataset size:") print("n_samples: %d" % n) print("n_features: %d" % d) print("n_classes: %d" % n_classes) return X, y, n_classes
def getFaceData(): # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(data_home='.', min_faces_per_person=70, resize=0.4) # insert code here X = lfw_people.data n_features = X.shape[1] y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] n_samples, h, w = lfw_people.images.shape print "Total dataset size:" print "n_samples: %d" % n_samples print "n_features: %d" % n_features print "n_classes: %d" % n_classes return X,y,n_features,target_names,n_classes,n_samples,h,w
def get_data(dataset_name): print("Getting dataset: %s" % dataset_name) if dataset_name == 'lfw_people': X = fetch_lfw_people().data elif dataset_name == '20newsgroups': X = fetch_20newsgroups_vectorized().data[:, :100000] elif dataset_name == 'olivetti_faces': X = fetch_olivetti_faces().data elif dataset_name == 'rcv1': X = fetch_rcv1().data elif dataset_name == 'CIFAR': if handle_missing_dataset(CIFAR_FOLDER) == "skip": return X1 = [unpickle("%sdata_batch_%d" % (CIFAR_FOLDER, i + 1)) for i in range(5)] X = np.vstack(X1) del X1 elif dataset_name == 'SVHN': if handle_missing_dataset(SVHN_FOLDER) == 0: return X1 = sp.io.loadmat("%strain_32x32.mat" % SVHN_FOLDER)['X'] X2 = [X1[:, :, :, i].reshape(32 * 32 * 3) for i in range(X1.shape[3])] X = np.vstack(X2) del X1 del X2 elif dataset_name == 'low rank matrix': X = make_low_rank_matrix(n_samples=500, n_features=np.int(1e4), effective_rank=100, tail_strength=.5, random_state=random_state) elif dataset_name == 'uncorrelated matrix': X, _ = make_sparse_uncorrelated(n_samples=500, n_features=10000, random_state=random_state) elif dataset_name == 'big sparse matrix': sparsity = np.int(1e6) size = np.int(1e6) small_size = np.int(1e4) data = np.random.normal(0, 1, np.int(sparsity/10)) data = np.repeat(data, 10) row = np.random.uniform(0, small_size, sparsity) col = np.random.uniform(0, small_size, sparsity) X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size)) del data del row del col else: X = fetch_mldata(dataset_name).data return X
def gen_face_sets(): people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) n_samples, h, w = people.images.shape data = people.data n_features = data.shape[1] target = people.target target_names = people.target_names n_classes = target_names.shape[0] N = len(target) inds = random.sample(sp.arange(0, N), N) n_train = int(sp.floor(0.8 * N)) trainingdata = data[inds[0:n_train], :] trainingtarget = target[inds[0:n_train]] testdata = data[inds[n_train:]] testtarget = target[inds[n_train:]] return trainingdata, testdata, trainingtarget, testtarget
def load_data(): global training_data, testing_data lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) xs = lfw_people.data ys = lfw_people.target inputs = [] labels = list(ys) for face in xs: V = Vol(50, 37, 1, 0.0) V.w = list(face) inputs.append(augment(V, 30)) x_tr, x_te, y_tr, y_te = train_test_split(inputs, labels, test_size=0.25) training_data = zip(x_tr, y_tr) testing_data = zip(x_te, y_te) print 'Dataset made...'
parser.add_argument('--dataset','-ds',help = 'Dataset (mnist, people)', default = None) args = parser.parse_args() if args.mode == None: exit() print 'fetch data...' if args.dataset == 'mnist': from sklearn.datasets import fetch_mldata data = fetch_mldata('MNIST original', data_home=".") images = np.array(data.data).astype(np.float32) images = images.reshape(images.shape[0],28,28) elif args.dataset == 'people': from sklearn.datasets import fetch_lfw_people data = fetch_lfw_people() images = np.array(data.images).astype(np.float32) else: print 'Select dataset from (mnist, people)' exit() if args.mode == 'mnist_fc': from mnist_fc import Generator, Discriminator elif args.mode == 'mnist_conv': from mnist_conv import Generator, Discriminator elif args.mode == 'people': from people_conv import Generator, Discriminator else: print 'Select mode from (mnist_fc, mnist_conv, people)' exit()
def __init__(self): self.faces = fetch_lfw_people(min_faces_per_person=60) print('data loaded')
len(batch_sizes)) all_times['rpca'].extend([results_dict['rpca']['time']] * len(batch_sizes)) all_errors['rpca'].extend([results_dict['rpca']['error']] * len(batch_sizes)) for batch_size in batch_sizes: ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size) results_dict = {k: benchmark(est, data) for k, est in [('ipca', ipca)]} all_times['ipca'].append(results_dict['ipca']['time']) all_errors['ipca'].append(results_dict['ipca']['error']) plot_batch_times(all_times, n_components, batch_sizes, data) # RandomizedPCA error is always worse (approx 100x) than other PCA # tests plot_batch_errors(all_errors, n_components, batch_sizes, data) faces = fetch_lfw_people(resize=.2, min_faces_per_person=5) # limit dataset to 5000 people (don't care who they are!) X = faces.data[:5000] n_samples, h, w = faces.images.shape n_features = X.shape[1] X -= X.mean(axis=0) X /= X.std(axis=0) fixed_batch_size_comparison(X) variable_batch_size_comparison(X) plt.show()
def face_feature(): from time import time import logging import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.datasets import fetch_lfw_people from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import PCA from sklearn.svm import SVC # 在stdout中输出过程日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') # 如果本地还没有Numpy数组格式的数据,则从网上下载。 lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) # 图像数组的规模 n_samples, h, w = lfw_people.images.shape X = lfw_people.data n_features = X.shape[1] # 人物id是预测目的标签 y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print("Total dataset size:") print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) print("n_classes: %d" % n_classes) # 用分层K-Fold方法划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # 在人脸数据集上计算PCA(当作无标签数据集):无监督特征提取/维数压缩 n_components = 150 print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])) t0 = time() pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) # 训练SVM分类模型 print("Fitting the classifier to the training set") t0 = time() param_grid = { 'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid) clf = clf.fit(X_train_pca, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) # 在测试集上定量评估模型质量 print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) # 用matplotlib定量绘制预测器的评估 def plot_gallery(images, titles, h, w, n_row=3, n_col=4): """Helper function to plot a gallery of portraits""" plt.figure(figsize=(1.8 * n_col, 2.4 * n_row)) plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35) for i in range(n_row * n_col): plt.subplot(n_row, n_col, i + 1) plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray) plt.title(titles[i], size=12) plt.xticks(()) plt.yticks(()) # 在测试集的一部分上绘制预测结果图象 def title(y_pred, y_test, target_names, i): pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1] true_name = target_names[y_test[i]].rsplit(' ', 1)[-1] return 'predicted: %s\ntrue: %s' % (pred_name, true_name) prediction_titles = [ title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0]) ] plot_gallery(X_test, prediction_titles, h, w) # 画出辨识度最高的特征脸 eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])] plot_gallery(eigenfaces, eigenface_titles, h, w) plt.show()
'training.1600000.processed.noemoticon.csv') test_path = os.path.join(sentiment140_path, 'testdata.manual.2009.06.14.csv') if not os.path.exists(sentiment140_path): if not os.path.exists(archive_path): print("Downloading dataset from %s (77MB)" % SENTIMENT140_URL) opener = urlopen(SENTIMENT140_URL) open(archive_path, 'wb').write(opener.read()) else: print("Found archive: " + archive_path) print("Extracting %s to %s" % (archive_path, sentiment140_path)) zf = zipfile.ZipFile(archive_path) zf.extractall(sentiment140_path) print("Checking that the sentiment 140 CSV files exist...") assert os.path.exists(train_path) assert os.path.exists(test_path) print("=> Success!") if __name__ == "__main__": datasets_folder = get_datasets_folder() check_sentiment140(datasets_folder) print("Loading Labeled Faces Data (~200MB)") from sklearn.datasets import fetch_lfw_people fetch_lfw_people(min_faces_per_person=70, resize=0.4, data_home=datasets_folder) print("=> Success!")
""" import numpy as np from time import time import pylab as pl from sklearn.cross_validation import train_test_split from sklearn.datasets import fetch_lfw_people from sklearn.svm import SVC from sklearn import grid_search #################################################################### # Download the data (if not already on disk); load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4, color=True, funneled=False, slice_=None, download_if_missing =True) # introspect the images arrays to find the shapes (for plotting) images = lfw_people.images / 255. n_samples, h, w, n_colors = images.shape # the label to predict is the id of the person target_names = lfw_people.target_names.tolist() #################################################################### # Pick a pair to classify such as names = ['Tony Blair', 'Colin Powell'] #names = ['Donald Rumsfeld', 'Colin Powell'] idx0 = (lfw_people.target == target_names.index(names[0]))
def Bot_image_recognized(image): print(__doc__) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=40, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape #Resize image to training data set pil_im = Image.open(image) image_resized = pil_im.resize((w, h)) #image_resized=resizeimage.resize_thumbnail(pil_im, [h, w]) face = array(image_resized.convert("L"), "f") face_1D = face.ravel() print(face_1D) # for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data print(X[0]) n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print("Total dataset size:") print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) print("n_classes: %d" % n_classes) ############################################################################### # Split into a training set and a test set using a stratified k fold # split into a training and testing set X_train = X print(X_train.shape) y_train = y X_test = face_1D print(X_test.shape) ############################################################################### # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 100 print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])) t0 = time() pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) ############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") t0 = time() param_grid = { 'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid) clf = clf.fit(X_train_pca, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) ############################################################################### # Quantitative evaluation of the model quality on the test set print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0)) print(target_names[y_pred]) #print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) return target_names[y_pred]
from sklearn.metrics import classification_report from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn import datasets import numpy as np import imutils import cv2 import sklearn print('[INFO] fetching data...') dataset = datasets.fetch_lfw_people(min_faces_per_person=70, funneled=True, resize=0.5) (trainData, testData, trainLabels, testLabels) = train_test_split(dataset.data, dataset.target, test_size=0.25, random_state=42) print('[INFO] training model...') model = LogisticRegression() model.fit(trainData, trainLabels) print( classification_report(testLabels, model.predict(testData), target_names=dataset.target_names))
import youtube_dl import cv2 import face_recognition import sklearn from sklearn.datasets import fetch_lfw_people lfw_people = fetch_lfw_people() def process_video(vidfile): face_localizations = [] face_encodings = [] face_ids = [] frame_num = 0 # start processing video input_movie = cv2.VideoCapture(vidfile) length = int(input_movie.get(cv2.CAP_PROP_FRAME_COUNT)) while True: ret, frame = input_movie.read() frame_num += 1 if not ret: continue # bgr to rgb rgb_frame = frame[:, :, ::-1] # Find all the faces and face encodings in the current frame of video face_locations = face_recognition.face_locations(rgb_frame, model="hog") face_encodings = face_recognition.face_encodings(rgb_frame, face_locations) if face_encodings: face_encodings = face_encodings[0] face_ids = [None for i in face_encodings]
from skimage.feature import canny from skimage.draw import circle_perimeter from skimage.util import img_as_ubyte count = 0 print(__doc__) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, slice_=(slice(50, 140), slice(61, 189)), resize=1.5) faces = lfw_people.images #for all in lfw_people for face in faces: # Dee the below is just a quick fix, just do 10, not the whole lot for # because that produced a lot of images to count. So for simplicity this # version stops after 10 images. if count < 10: image = face fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 10)) ax1.imshow(image, cmap=plt.cm.gray)
from sklearn import datasets, model_selection from keras import utils, models, layers, optimizers lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=.4) n_samples, h, w = lfw_people.images.shape x = lfw_people.images.reshape(n_samples, h, w) / 255.0 target_names = lfw_people.target_names n_class = len(target_names) y = utils.to_categorical(lfw_people.target, n_class) x_train, x_test, y_train, y_test = model_selection.train_test_split( x, y, test_size=.1, random_state=42) sequential = models.Sequential() sequential.add(layers.GRU(64, input_shape=(h, w), dropout=.25)) #sequential.add(layers.LSTM(64,input_shape=(h,w),dropout=.25)) #sequential.add(layers.LSTM(128,input_shape=(h,w),dropout=.25)) sequential.add(layers.Dense(n_class, activation='softmax')) sequential.compile(optimizer=optimizers.adam(), loss='categorical_crossentropy', metrics=['accuracy']) sequential.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=120)
from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import RandomizedPCA from sklearn.svm import SVC print(__doc__) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays # only consider people that have a minimum 70 pictures in the data set # we only resize the images so that each have a 0.4 aspect ratio lfw_people = fetch_lfw_people('./faces') # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape # for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print("Total dataset size:")
def lfwTest01(): from sklearn.datasets import fetch_lfw_people lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) for name in lfw_people.target_names: print(name)
from sklearn.datasets import fetch_lfw_people logger.info("sys.version_info") logger.info("sklearn.__version__") import math import numpy as np from skimage import exposure import scipy.misc import caffe import scipy.io as io # loading data lfw_people = fetch_lfw_people(color=True) lfw_people_color = lfw_people target_names = lfw_people.target_names X, y = lfw_people.data, lfw_people.target # this does not work, deprecated # lfw_fea_data = io.loadmat('LFW_Feature.mat') # read targets target_img = "0.jpg" image = caffe.io.load_image(target_img) target = image plt.figure() plt.imshow(target) enhanced = exposure.equalize_hist(image[50:180, 60:170])
def face_recognition_test(): print(__doc__) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=40, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape # for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data n_features = X.shape[1] #The label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print("Total dataset size:") print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) print("n_classes: %d" % n_classes) ############################################################################### # Split into a training set and a test set using a stratified k fold # split into a training and testing set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) ############################################################################### # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 100 print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])) t0 = time() pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) ############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") t0 = time() param_grid = { 'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid) clf = clf.fit(X_train_pca, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) ############################################################################### # Quantitative evaluation of the model quality on the test set print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0)) acc = clf.score(X_test_pca, y_test) print(acc) print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) ############################################################################### # Qualitative evaluation of the predictions using matplotlib def plot_gallery(images, titles, h, w, n_row=3, n_col=4): """Helper function to plot a gallery of portraits""" plt.figure(figsize=(1.8 * n_col, 2.4 * n_row)) plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35) for i in range(n_row * n_col): plt.subplot(n_row, n_col, i + 1) plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray) plt.title(titles[i], size=12) plt.xticks(()) plt.yticks(()) # plot the result of the prediction on a portion of the test set def title(y_pred, y_test, target_names, i): pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1] true_name = target_names[y_test[i]].rsplit(' ', 1)[-1] return 'predicted: %s\ntrue: %s' % (pred_name, true_name) prediction_titles = [ title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0]) ] plot_gallery(X_test, prediction_titles, h, w) # plot the gallery of the most significative eigenfaces eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])] plot_gallery(eigenfaces, eigenface_titles, h, w) plt.show()
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from sklearn.model_selection import train_test_split from sklearn.datasets import fetch_lfw_people import matplotlib.pyplot as plt tf.logging.set_verbosity(tf.logging.INFO) ############################################# # LOAD DATA AND SPLIT FOR TRAINING AND EVALUATING # this produces cropped centered 64x64 image lfw_people = fetch_lfw_people(min_faces_per_person=70, slice_=(slice(61, 189), slice(61, 189)), resize=0.5, color=False) X = lfw_people.images y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] y = np.asarray(y, dtype=np.int32) # split into a training and testing set # X_train, X_test, y_train, y_test = train_test_split( train_set, eval_set, train_lbl, eval_lbl = train_test_split(X, y, test_size=0.25, random_state=10)
def load(): lfw_people = fetch_lfw_people(min_faces_per_person=5, resize=1) return lfw_people
def test_load_empty_lfw_people(): fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA, download_if_missing=False)
from sklearn.datasets import fetch_lfw_people import numpy as np people = fetch_lfw_people(min_faces_per_person=20, resize=0.7, download_if_missing=True) print(people.images.shape) #Importing required packages and utilities from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.cross_validation import train_test_split import matplotlib.pyplot as plt people.target = people.target.reshape(people.target.shape[0], 1) #preprocessing data raw_data = people.images.reshape( people.images.shape[0], people.images.shape[1] * people.images.shape[2]) scaler = StandardScaler() scaled_data = scaler.fit_transform(raw_data) #spliting data into training and testing set from sklearn.neighbors import KNeighborsClassifier components = [] accuracies = [] for i in xrange(1, scaled_data.shape[1]): pca = PCA(n_components=i)
from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import RandomizedPCA from sklearn.svm import SVC print(__doc__) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape # for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target print(y) target_names = lfw_people.target_names n_classes = target_names.shape[0]
def go(options): # Debugging info to see if we're using the GPU print('devices', device_lib.list_local_devices()) # These are people in the data that smile SMILING = [0, 7, 8, 11, 12, 13, 14, 20, 27, 155, 153, 154, 297] NONSMILING = [1, 2, 3, 6, 10, 60, 61, 136, 138, 216, 219, 280] # Dowload the data faces = datasets.fetch_lfw_people(data_home='.') x = faces.images # x is a 13000 by 67 by 42 array hidden_size = options.hidden # Build the encoder encoder = Sequential() encoder.add(Flatten(input_shape=(62, 47))) encoder.add(Dense(1024, activation='relu')) encoder.add(Dense(512, activation='relu')) encoder.add(Dense(256, activation='relu')) encoder.add(Dense(128, activation='relu')) encoder.add(Dense(hidden_size)) # Build the decoder decoder = Sequential() decoder.add(Dense(128, activation='relu', input_dim=hidden_size)) decoder.add(Dense(256, activation='relu')) decoder.add(Dense(512, activation='relu')) decoder.add(Dense(1024, activation='relu')) decoder.add(Dense(62 * 47, activation='relu')) decoder.add(Reshape((62, 47))) # Stick em together to make the autoencoder auto = Sequential() auto.add(encoder) auto.add(decoder) auto.summary() # Choose a loss function (MSE) and a search algorithm # (Adam, a fancy version of gradient descent) optimizer = Adam(lr=options.lr) auto.compile(optimizer=optimizer, loss='mse') # Search for a good model auto.fit(x, x, epochs=options.epochs, batch_size=256, shuffle=True, validation_split=0.1) # Select the smiling and nonsmiling images from the dataset smiling = x[SMILING, ...] nonsmiling = x[NONSMILING, ...] # Pass them through the encoder smiling_latent = encoder.predict(smiling) nonsmiling_latent = encoder.predict(nonsmiling) # Compute the means for both groups smiling_mean = smiling_latent.mean(axis=0) nonsmiling_mean = nonsmiling_latent.mean(axis=0) # Subtract for smiling vector smiling_vector = smiling_mean - nonsmiling_mean # Making somebody smile (person 42): latent = encoder.predict(x[None, 42, ...]) l_smile = latent + 0.3 * smiling_vector smiling = decoder.predict(l_smile) # Plot fronwing-to-smiling transition for several people # in a big PDF image randos = 6 k = 9 fig = plt.figure(figsize=(k, randos)) for rando in range(randos): rando_latent = encoder.predict(x[None, rando, ...]) # plot several images adds = np.linspace(-1.0, 1.0, k) for i in range(k): gen_latent = rando_latent + adds[i] * smiling_vector gen = decoder.predict(gen_latent) ax = fig.add_subplot(randos, k, rando * k + i + 1, xticks=[], yticks=[]) ax.imshow(gen.reshape((62, 47)), cmap=plt.cm.gray) plt.savefig('rando-to-smiling.pdf')
def main(): print(__doc__) # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=100, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape # for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print("Total dataset size:") print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) print("n_classes: %d" % n_classes) ############################################################################### # Split into a training set and a test set using a stratified k fold # split into a training and testing set X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25) n_train_samples = X_train.shape[0] n_test_samples = X_test.shape[0] ############################################################################### # legacy PCA: just computes all the eigenvectors of the training data # then select eigenvectors that have the highest eigenvalues legacy_PCA_demo = False if legacy_PCA_demo: n_components = 150 print("Extracting the top %d eigenfaces from %d faces using legacy PCA" % (n_components, X_train.shape[0])) t0 = time() pca = LegacyPCA(n_components=n_components, whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca_legacy = pca.transform(X_train) X_test_pca_legacy = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) print("Fitting the Prototype classifier to the training set using legacy PCA") t0 = time() clf = PrototypeClassifier().fit(X_train_pca_legacy, y_train) print("done in %0.3fs" % (time() - t0)) print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca_legacy) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) print("Fitting the SVM classifier to the training set using legacy PCA") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid) clf = clf.fit(X_train_pca_legacy, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca_legacy) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) ############################################################################## # Random PCA random_PCA_demo = True if random_PCA_demo: n_components = 150 print("Extracting the top %d eigenfaces from %d faces using random PCA" % (n_components, X_train.shape[0])) t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces_random = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca_random = pca.transform(X_train) X_test_pca_random = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) print("Fitting the Prototype classifier to the training set using random PCA") t0 = time() clf = PrototypeClassifier().fit(X_train_pca_random, y_train) print("done in %0.3fs" % (time() - t0)) print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca_random) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) print("Fitting the classifier to the training set using random PCA") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid) clf = clf.fit(X_train_pca_random, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca_random) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) ############################################################################## # EM PCA em_PCA_demo = True if em_PCA_demo: n_components = 150 print("Extracting the top %d eigenfaces from %d faces using random PCA" % (n_components, X_train.shape[0])) t0 = time() pca = EMPCA(n_components=n_components, whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) eigenfaces_em = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca_em = pca.transform(X_train) X_test_pca_em = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) print("Fitting the classifier to the training set using EM PCA") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid) clf = clf.fit(X_train_pca_em, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca_em) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) ############################################################################### # Classification using prototype and Euclidean metric ############################################################################### # Classification using support vector machines ############################################################################### # Qualitative evaluation of the predictions using matplotlib eigenfaces_legacy = pca.components_.reshape((n_components, h, w)) eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces_legacy.shape[0])] plot_gallery(eigenfaces_legacy, eigenface_titles, h, w)
if not os.path.exists(archive_path): print("Downloading dataset from %s (84.1MB)" % IMDB_URL) opener = urlopen(IMDB_URL) open(archive_path, 'wb').write(opener.read()) else: print("Found archive: " + archive_path) print("Extracting %s to %s" % (archive_path, imdb_path)) tar = tarfile.open(archive_path, "r:gz") tar.extractall(path=imdb_path) tar.close() os.remove(archive_path) print("Checking that the IMDb train & test directories exist...") assert os.path.exists(train_path) assert os.path.exists(test_path) print("=> Success!") if __name__ == "__main__": datasets_folder = get_datasets_folder() check_imdb(datasets_folder) print("\nLoading Labeled Faces Data (~200MB)") from sklearn.datasets import fetch_lfw_people fetch_lfw_people(min_faces_per_person=70, resize=0.4, data_home=datasets_folder) print("=> Success!")
def main(argv = None): if argv is None: argv = sys.argv # cascade_path = sys.argv[1] # image_path = sys.argv[2] cascade_path = "/usr/share/OpenCV/haarcascades/haarcascade_frontalface_default.xml" # image_path = "/home/gbriones/Downloads/test2.jpg" image_path = "/home/gbriones/Downloads/tony_blair_00.jpg" result_path = sys.argv[3] if len(sys.argv) > 3 else None cascade = cv2.CascadeClassifier(cascade_path) # import pdb; pdb.set_trace() image = cv2.imread(image_path) if image is None: print("ERROR: Image did not load.") return 2 gray_image, detections = cascade_detect(cascade, image) crop_images = detections_draw(gray_image, detections) resized_image = cv2.resize(crop_images[0], (37, 50)) ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape # for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] n_components = 150 print(target_names) # import pdb; pdb.set_trace() print("Extracting the top %d eigenfaces from %d faces" % (n_components, X.shape[0])) t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X) print("done in %0.3fs" % (time() - t0)) eigenfaces = pca.components_.reshape((n_components, h, w)) import pdb; pdb.set_trace() print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_pca = pca.transform(X) X_test_pca = pca.transform([resized_image.flatten()]) print("done in %0.3fs" % (time() - t0)) ############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } if os.path.isfile('filename.pkl'): clf = joblib.load('filename.pkl') else: clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced', probability=True), param_grid) clf = clf.fit(X_pca, y) joblib.dump(clf, 'filename.pkl') print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0)) print(y_pred[0]) print(target_names[y_pred[0]]) print("Found {0} objects!".format(len(detections))) if result_path is None: # cv2.imshow("Objects found", resized_image) # cv2.waitKey(0) # plot_image(resized_image) images = [resized_image.flatten()] # import pdb; pdb.set_trace() titles = ["Original"] for index in range(len(y)): if y[index] == y_pred[0] and len(images) < 12: images.append(X[index]) titles.append(target_names[y[index]]) plot_gallery(images, titles, h, w) eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])] plot_gallery(eigenfaces, eigenface_titles, h, w) plt.show() else: cv2.imwrite(result_path, image)
from sklearn.cross_validation import train_test_split from sklearn.datasets import fetch_lfw_people from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import RandomizedPCA from sklearn.svm import SVC # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(data_home='.', min_faces_per_person=70, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape # fot machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print "Total dataset size:"
"""Demo113_NMF_LFWPeople.ipynb # **Tame Your Python** """ import numpy as np import pandas as pd import matplotlib.pyplot as plt import tensorflow as tf print(tf.__version__) import matplotlib.pyplot as plt from sklearn.datasets import fetch_lfw_people # Load data dataset = fetch_lfw_people(min_faces_per_person=100) N, H, W = dataset.images.shape X = dataset.data y = dataset.target target_names = dataset.target_names print(target_names) print(dataset.images.shape) print(dataset.data.shape) print(dataset.target.shape) print(H * W) from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt import numpy as np import time as time #import the machine learning packages from sklearn.datasets import fetch_lfw_people from sklearn.cross_validation import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix # Downloading the data. # From the servers and if the data is alredy present. # In the current working directory load them as numpy array lfw_people = fetch_lfw_people( min_faces_per_person = 70) n_samples, h, w = lfw_people.images.shape #load the data into a variable its target values # and its target values or say expected values in another variable X = lfw_people.data # Feature Vector y = lfw_people.target # Target Variable n_images = X.shape[0] # Number of Images n_features = X.shape[1] # Number of Features person_name = lfw_people.target_names # Name of the person in the images
def test_comp(settings, random_sid=9): import keras from keras.optimizers import SGD from keras.datasets import mnist, fashion_mnist, cifar10 from skimage import filters from keras import backend as K from keras_utils import WeightHistory as WeightHistory from keras_utils import RecordVariable, \ PrintLayerVariableStats, PrintAnyVariable, SGDwithLR, eval_Kdict, standarize_image_025 from keras_preprocessing.image import ImageDataGenerator K.clear_session() epochs = settings['Epochs'] batch_size = settings['batch_size'] sid = random_sid np.random.seed(sid) tf.random.set_random_seed(sid) tf.compat.v1.random.set_random_seed(sid) # MINIMUM SIGMA CAN EFFECT THE PERFORMANCE. # BECAUSE NEURON CAN GET SHRINK TOO MUCH IN INITIAL EPOCHS WITH LARGER GRADIENTS #, and GET STUCK! MIN_SIG = 0.01 MAX_SIG = 1.0 MIN_MU = 0.0 MAX_MU = 1.0 lr_dict = {'all': settings['lr_all']} #0.1 is default for MNIST mom_dict = {'all': 0.9} decay_dict = {'all': 0.9} clip_dict = {} for i, n in enumerate(settings['nhidden']): lr_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.01}) lr_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.01}) lr_dict.update({'focus-' + str(i + 1) + '/Weights:0': 0.1}) mom_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.9}) mom_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9}) decay_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.5}) decay_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9}) clip_dict.update( {'focus-' + str(i + 1) + '/Sigma:0': (MIN_SIG, MAX_SIG)}) clip_dict.update({'focus-' + str(i + 1) + '/Mu:0': (MIN_MU, MAX_MU)}) print("Loading dataset") if settings['dset'] == 'mnist': # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() n_channels = 1 e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64') if settings['cnn_model']: decay_epochs = [e_i * 30, e_i * 100] elif settings['dset'] == 'cifar10': img_rows, img_cols = 32, 32 n_channels = 3 (x_train, y_train), (x_test, y_test) = cifar10.load_data() # works good as high as 77 for cnn-focus #decay_dict = {'all':0.9, 'focus-1/Sigma:0': 1.1,'focus-1/Mu:0':0.9, # 'focus-2/Sigma:0': 1.1,'focus-2/Mu:0': 0.9} #if cnn_model: batch_size=256 # this works better than 500 for cifar-10 e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 30, e_i * 80, e_i * 120, e_i * 180], dtype='int64') #decay_epochs =np.array([e_i*10], dtype='int64') elif settings['dset'] == 'fashion': img_rows, img_cols = 28, 28 n_channels = 1 (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64') if settings['cnn_model']: decay_dict = { 'all': 0.9, 'focus-1/Sigma:0': 0.9, 'focus-1/Mu:0': 0.9, 'focus-2/Sigma:0': 0.9, 'focus-2/Mu:0': 0.9 } decay_epochs = [e_i * 30, e_i * 100] elif settings['dset'] == 'mnist-clut': img_rows, img_cols = 60, 60 # the data, split between train and test sets folder = '/media/home/rdata/image/' data = np.load(folder + "mnist_cluttered_60x60_6distortions.npz") x_train, y_train = data['x_train'], np.argmax(data['y_train'], axis=-1) x_valid, y_valid = data['x_valid'], np.argmax(data['y_valid'], axis=-1) x_test, y_test = data['x_test'], np.argmax(data['y_test'], axis=-1) x_train = np.vstack((x_train, x_valid)) y_train = np.concatenate((y_train, y_valid)) n_channels = 1 lr_dict = {'all': 0.01} e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64') if settings['cnn_model']: decay_epochs = [e_i * 30, e_i * 100] elif settings['dset'] == 'lfw_faces': from sklearn.datasets import fetch_lfw_people lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, img_rows, img_cols = lfw_people.images.shape n_channels = 1 X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print("Total dataset size:") print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) print("n_classes: %d" % n_classes) from sklearn.model_selection import train_test_split #X -= X.mean() #X /= X.std() #split into a training and testing set x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) import matplotlib.pyplot as plt plt.imshow(X[0].reshape((img_rows, img_cols))) plt.show() lr_dict = {'all': 0.001} e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 50, e_i * 100, e_i * 150], dtype='int64') num_classes = np.unique(y_train).shape[0] if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows, img_cols) input_shape = (n_channels, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) if settings['dset'] != 'mnist-clut': x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train, _, x_test = standarize_image_025(x_train, tst=x_test) x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) sigma_reg = settings['focus_sigma_reg'] sigma_reg = keras.regularizers.l2( sigma_reg) if sigma_reg is not None else sigma_reg settings['focus_sigma_reg'] = sigma_reg if settings['cnn_model']: model = create_cnn_model(input_shape, num_classes, settings=settings) else: model = create_simple_model(input_shape, num_classes, settings=settings) model.summary() print(lr_dict) print(mom_dict) print(decay_dict) print(clip_dict) opt = SGDwithLR(lr_dict, mom_dict, decay_dict, clip_dict, decay_epochs) #, decay=None) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: '] stat_func_list = [np.max, np.mean, np.min, np.var, np.std] #callbacks = [tb] callbacks = [] if settings['neuron'] == 'focused': pr_1 = PrintLayerVariableStats("focus-1", "Weights:0", stat_func_list, stat_func_name) pr_2 = PrintLayerVariableStats("focus-1", "Sigma:0", stat_func_list, stat_func_name) pr_3 = PrintLayerVariableStats("focus-1", "Mu:0", stat_func_list, stat_func_name) rv_weights_1 = RecordVariable("focus-1", "Weights:0") rv_sigma_1 = RecordVariable("focus-1", "Sigma:0") rv_mu_1 = RecordVariable("focus-1", "Mu:0") print_lr_rates_callback = keras.callbacks.LambdaCallback( on_epoch_end=lambda epoch, logs: print( "iter: ", K.eval(model.optimizer.iterations), " LR RATES :", eval_Kdict(model.optimizer.lr))) callbacks += [ pr_1, pr_2, pr_3, rv_weights_1, rv_sigma_1, rv_mu_1, print_lr_rates_callback ] if not settings['augment']: print('Not using data augmentation.') history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # set range for random shear shear_range=0., # set range for random zoom zoom_range=0., # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format='channels_last', # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), validation_data=(x_test, y_test), epochs=epochs, verbose=1, workers=4, callbacks=callbacks, steps_per_epoch=x_train.shape[0] // batch_size) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) return score, history, model, callbacks
def lfwTest02(): #from __future__ import print_function #学习对应模块间的接口, 数据格式 from time import time import logging import matplotlib.pyplot as plt from sklearn.cross_validation import train_test_split from sklearn.datasets import fetch_lfw_people from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import RandomizedPCA from sklearn.svm import SVC lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) #这里的min_faces_per_person是用来限制读取图片的数据 #lfw_people = fetch_lfw_people(min_faces_per_person=5, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] #print target_names.shape #print("Total dataset size:") #print("n_samples: %d" % n_samples) #print("n_features: %d" % n_features) #print("n_classes: %d" % n_classes) #print("h: %d" % h) #print("w: %d" % w) #print lfw_people #print target_names ############################################################################### # Split into a training set and a test set using a stratified k fold # split into a training and testing set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) #X_train 是966 * 1850的二维矩阵 print X_train print len(X_train) print len(X_train[0]) #singleImage = X[1].reshape(h, w) # ##显示图片 #plt.imshow(singleImage, cmap = plt.cm.gray_r) #plt.show() # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150 print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])) t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) #这里是利用PCA获得主成分 print("done in %0.3fs" % (time() - t0)) print pca.components_ #这里的pca.components_是150 * 1850的矩阵 -- 可以理解为就是找了150个向量, 每一向量都是原来所有样本的某一种线性组合(因此特征维数不会变) print len(pca.components_) print len(pca.components_[0]) eigenfaces = pca.components_.reshape((n_components, h, w)) #将向量转化为图片 #这里可以认为特征脸是原始若干张人脸的线性叠加 print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) #把原始图片投影到eigenfaces空间里 X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0)) #print len(eigenfaces[0]) print X_train_pca #X_train_pca是966 * 150维矩阵, 其150维的每一维都是原始的train向量在对应eigenface上的投影 print len(X_train_pca) #所以train_pca是一组float, 不是int print len(X_train_pca[0]) #print y_train #y_train就是label, 0-6 表示类别 #显示图片 #plt.imshow(eigenfaces[-1], cmap = plt.cm.gray_r) #plt.show() # Train a SVM classification model print("Fitting the classifier to the training set") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid) #注意这里是用svm进行分类,所以是svc clf = clf.fit(X_train_pca, y_train) #输入为转换后的pca数据和y_train进行训练 -- 多类别svm print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) # Quantitative evaluation of the model quality on the test set print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) #测试数据也是在相同的eigenface空间内进行投影后的结果 print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) ############################################################################### # Qualitative evaluation of the predictions using matplotlib def plot_gallery(images, titles, h, w, n_row=3, n_col=4): """Helper function to plot a gallery of portraits""" plt.figure(figsize=(1.8 * n_col, 2.4 * n_row)) plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35) for i in range(n_row * n_col): plt.subplot(n_row, n_col, i + 1) plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray) plt.title(titles[i], size=12) plt.xticks(()) plt.yticks(()) # plot the result of the prediction on a portion of the test set def title(y_pred, y_test, target_names, i): pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1] true_name = target_names[y_test[i]].rsplit(' ', 1)[-1] return 'predicted: %s\ntrue: %s' % (pred_name, true_name) prediction_titles = [title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0])] plot_gallery(X_test, prediction_titles, h, w) # plot the gallery of the most significative eigenfaces eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])] plot_gallery(eigenfaces, eigenface_titles, h, w) plt.show()
#5. Construct a one-dimensional feature vector from the information in each cell. from skimage import data, color, feature import skimage.data image = color.rgb2gray(data.chelsea()) hog_vec, hog_vis = feature.hog(image, visualise=True) fig, ax = plt.subplots(1, 2, figsize=(12, 6), subplot_kw=dict(xticks=[], yticks=[])) ax[0].imshow(image, cmap='gray') ax[0].set_title('input image') ax[1].imshow(hog_vis) ax[1].set_title('visualization of HOG features'); #obtain a set of positive training samples from sklearn.datasets import fetch_lfw_people faces = fetch_lfw_people() positive_patches = faces.images positive_patches.shape #obtain a set of negative training samples from skimage import data, transform imgs_to_use = ['camera', 'text', 'coins', 'moon', 'page', 'clock', 'immunohistochemistry', 'chelsea', 'coffee', 'hubble_deep_field'] images = [color.rgb2gray(getattr(data, name)()) for name in imgs_to_use] from sklearn.feature_extraction.image import PatchExtractor def extract_patches(img, N, scale=1.0, patch_size=positive_patches[0].shape): extracted_patch_size = \ tuple((scale * np.array(patch_size)).astype(int)) extractor = PatchExtractor(patch_size=extracted_patch_size, max_patches=N, random_state=0) patches = extractor.transform(img[np.newaxis]) if scale != 1:
fig, ax = plt.subplots(1, 2, figsize=(16, 6)) fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1) for axi, C in zip(ax, [10.0, 0.1]): model = SVC(kernel='linear', C=C).fit(X, y) axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') plot_svc_decision_function(model, axi) axi.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, lw=1, facecolors='none') axi.set_title('C = {0:.1f}'.format(C), size=14) #%% Example: face recognition from sklearn.datasets import fetch_lfw_people faces = fetch_lfw_people(min_faces_per_person=60) print(faces.target_names) print(faces.images.shape) #%% Plotting a few of these face fig, ax = plt.subplots(3, 5) for i, axi in enumerate(ax.flat): axi.imshow(faces.images[i], cmap='bone') axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]]) #%% extract fundamental components & apply svm from sklearn.decomposition import PCA as RandomizedPCA from sklearn.pipeline import make_pipeline pca = RandomizedPCA(n_components=150, whiten=True, random_state=42) svc = SVC(kernel='rbf', class_weight='balanced')
def fetch_dataset(): # labelled faces in the wild data with users more than 100 faces dataset = fetch_lfw_people(min_faces_per_person=100) return dataset
from sklearn import datasets lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=0.4, data_home='data')
print(math.acos(1/math.sqrt(2))) print('45гр=',math.pi/4) #t=1 #alpha=beta=1/math.sqrt(2) #Каковы собственные значения матрицы X^TX , где X – матрица, соответствующая отмасштабированной выборке? X_scaled.dot(np.array([1./np.sqrt(2), 1./np.sqrt(2)])) print(' Каковы собственные значения матрицы X^TX , где X – матрица, соответствующая отмасштабированной выборке?',np.linalg.eig(X_scaled.T.dot(X_scaled))[0]) sing=np.linalg.eig(X_scaled.dot(X_scaled.T))[0] print('В чем смысл двух чисел из прошлого вопроса? эти числа говорят о том, какую часть дисперсии исходных данных объясняют главные компоненты') lfw_people = datasets.fetch_lfw_people(min_faces_per_person=50, resize=0.4, data_home='faces.dat') print('%d objects, %d features, %d classes' % (lfw_people.data.shape[0], lfw_people.data.shape[1], len(lfw_people.target_names))) #print('\nPersons:') for name in lfw_people.target_names: # print(name) fig = plt.figure(figsize=(8, 6)) #Посмотрим на содержимое датасета. Все изображения лежат в массиве lfw_people.images for i in range(15): ax = fig.add_subplot(3, 5, i + 1, xticks=[], yticks=[]) ax.imshow(lfw_people.images[i], cmap='gray') #Какое минимальное число компонент PCA необходимо, чтобы объяснить 90% дисперсии
class FaceRec(ModelTrafficSign): from sklearn.datasets import fetch_lfw_people people = fetch_lfw_people(color=True, min_faces_per_person=25) NB_LABELS = len(set(people.target))
def name_title(predictions, i): pred_name = target_names[predictions[i]].rsplit(' ', 1)[-1] return pred_name def prediction_title(predictions, actual, target_names, i): pred_name = target_names[predictions[i]].rsplit(' ', 1)[-1] true_name = target_names[actual[i]].rsplit(' ', 1)[-1] return 'predicted: %s\ntrue: %s' % (pred_name, true_name) #load data and split into test/train sets, one-hot encode labels print("\nLoading LFW dataset") lfw_people = fetch_lfw_people(data_home='.cache', min_faces_per_person=70, slice_=(slice(75, 200), slice(75, 200)), resize=0.4, color=False) _, h, w = lfw_people.images.shape images = lfw_people.data labels = lfw_people.target target_names = lfw_people.target_names X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.25, random_state=42) #compute pca to extract eigenfaces pca = PCA(n_components=50, svd_solver='randomized', whiten=True).fit(X_train) eigenfaces = pca.components_.reshape((50, h, w))
data = numpy.asarray(digits.data, dtype='float32') target = numpy.asarray(digits.target, dtype='int32') nudged_x, nudged_y = nudge_dataset(data, target) if SCALE: nudged_x = preprocessing.scale(nudged_x) x_train, x_test, y_train, y_test = cross_validation.train_test_split( nudged_x, nudged_y, test_size=0.2, random_state=42) train_models(x_train, y_train, x_test, y_test, nudged_x.shape[1], len(set(target)), numpy_rng=numpy.random.RandomState(123), name='digits') if FACES: import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') lfw_people = datasets.fetch_lfw_people(min_faces_per_person=50, resize=0.4) X = numpy.asarray(lfw_people.data, dtype='float32') if SCALE: X = preprocessing.scale(X) y = numpy.asarray(lfw_people.target, dtype='int32') target_names = lfw_people.target_names print("Total dataset size:") print("n samples: %d" % X.shape[0]) print("n features: %d" % X.shape[1]) print("n classes: %d" % target_names.shape[0]) x_train, x_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.2, random_state=42) train_models(x_train, y_train, x_test, y_test, X.shape[1], len(set(y)), numpy_rng=numpy.random.RandomState(123), name='faces')
""" Name : Roshan Zameer Syed ID: 99999-2920 Project 7 : Support vector machine for the face classification problem """ from sklearn.datasets import fetch_lfw_people import pandas as pd import numpy as np faces = fetch_lfw_people(min_faces_per_person=60) #Importing the data set with min faces = 60 n_samples, h, w = faces.images.shape print('Target names: ', faces.target_names) #Printing the target names print('Shape of the data: ', faces.images.shape) #Shape of the data X = faces.data #print(X) print(faces.data.shape) n_features = faces.data.shape[1] # features is the dimension print(n_features) y = faces.target print(y) target_names = faces.target_names n_classes = target_names.shape[0] print(n_classes) print("n_samples: %d" % n_samples) # Print number of samples print("n_features: %d" % n_features) #Print number of features print("n_classes: %d" % n_classes) #Print number of classes # Splitting the data set to training and testing data set from sklearn.model_selection import train_test_split
def test_load_fake_lfw_people_too_restrictive(): fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100, download_if_missing=False)
'yticks': [] }, gridspec_kw=dict(hspace=0.1, wspace=0.1)) for i, ax in enumerate(axes.flat): ax.imshow(faces[i].reshape(62, 47), cmap='bone') plt.show() # def test_small(): # X = np.random.random(size=[5, 15]) # E = np.ones(X.shape)*1e-6 # eRPCA(X, E) if __name__ == '__main__': # test_small() faces = fetch_lfw_people() random_indexes = np.random.permutation(len(faces.data)) X = faces.data[random_indexes] # example_faces = X[:36, :] # plot_faces (example_faces) import random random.seed(2) faces2 = fetch_lfw_people(min_faces_per_person=250) random_indexes = np.random.permutation(len(faces2.data)) X = faces2.data[random_indexes] example_faces2 = X[:10, :] test = example_faces2[0] for _ in test: print(_)
# coding:utf-8 import logging from time import time from sklearn.datasets import fetch_lfw_people from sklearn.cross_validation import train_test_split from sklearn.decomposition import RandomizedPCA from sklearn.grid_search import GridSearchCV from sklearn.svm import SVC from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") lfw_people = fetch_lfw_people(data_home="D:\\My documents\\code\\dataset\\", resize=0.4) n_samples, h, w = lfw_people.images.shape X = lfw_people.data n_features = X.shape[1] y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print ("Total dataset size:") print ("n_samples: %d" % n_samples) print ("n_features: %d" % n_features) print ("n_classes: %d" % n_classes)
# -*- coding: utf-8 -*- """ Created on Sat Jun 30 10:48:20 2018 @author: lenovo """ import tensorflow as tf import matplotlib.pyplot as plt import numpy as np from tensorflow.python.framework import ops import random import struct from sklearn.datasets import fetch_lfw_people import math #1.读取lfw数据,裁剪成80*80大小 lfw=fetch_lfw_people(data_home=None,resize=0.9) n_samples,h,w=lfw.images.shape lfw.images=lfw.images[0:13233,16:96,2:82]#共13233幅人脸图片,每幅大小为80*80 choose_images_as_train = random.sample(range(0,13233,1),10000)#选出10000张作为训练集,3233张作为验证集 lfw_train_images = lfw.images[choose_images_as_train] lfw_validation_images = lfw.images[np.delete(range(0,13233,1),choose_images_as_train)] ''' #画图 plt.figure() plt.imshow(lfw_train_images[0]) plt.show() ''' #2.随机置换矩阵 def swapRows(M, r1, r2): M[r1],M[r2] = M[r2],M[r1]
from sklearn.cross_validation import train_test_split from sklearn.datasets import fetch_lfw_people from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import RandomizedPCA from sklearn.svm import SVC # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') ############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape np.random.seed(42) # for machine learning we use the data directly (as relative pixel # position info is ignored by this model) X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0]
plt.legend(loc='upper right') plt.grid() plt.xlabel('epoch') plt.ylabel('loss') # pooling : 차원 축소 기법, 의미 있는 신호만 전달 # - maxpooling : 가장 큰 신호만 전달 # dropout : 차원 축소 기법, 신호를 아예 꺼버리는 방식 # [ 연습 문제 - 얼굴인식 data의 deep learning model 적용 ] # 1. ANN # data loading from sklearn.datasets import fetch_lfw_people people = fetch_lfw_people(min_faces_per_person=20, resize=0.7) people.data.shape # down sampling v_nrow = [] for i in np.unique(people.target): nrow = np.where(people.target == i)[0][:50] v_nrow = v_nrow + list(nrow) people_x = people.data[v_nrow] people_y = people.target[v_nrow] # train, test data split train_x, test_x, train_y, test_y = train_test_split(people_x, people_y,
def load_dataset(dset, normalize_data, options): if dset == 'mnist': # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() print(x_train.shape) n_channels = 1 elif dset == 'cifar10': img_rows, img_cols = 32, 32 n_channels = 3 (x_train, y_train), (x_test, y_test) = cifar10.load_data() elif dset == 'fashion': img_rows, img_cols = 28, 28 n_channels = 1 (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() elif dset == 'mnist-clut': img_rows, img_cols = 60, 60 # the data, split between train and test sets #folder='/media/home/rdata/image/' folder = '/home/btek/datasets/image/' data = np.load(folder + "mnist_cluttered_60x60_6distortions.npz", allow_pickle=True) y_trn = data['y_train'] y_val = data['y_valid'] y_tst = data['y_test'] x_train, y_train = data['x_train'], np.argmax(y_trn, axis=-1) x_valid, y_valid = data['x_valid'], np.argmax(y_val, axis=-1) x_test, y_test = data['x_test'], np.argmax(y_tst, axis=-1) x_train = np.vstack((x_train, x_valid)) y_train = np.concatenate((y_train, y_valid)) n_channels = 1 normalize_data = False # this dataset is already somehow normalized #decay_epochs =[e_i*30,e_i*100] elif dset == 'lfw_faces': from sklearn.datasets import fetch_lfw_people lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, img_rows, img_cols = lfw_people.images.shape n_channels = 1 X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names num_classes = target_names.shape[0] from sklearn.model_selection import train_test_split #X -= X.mean() #X /= X.std() #split into a training and testing set x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25) if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows, img_cols) input_shape = (n_channels, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) ''' why I have written this?? BTEK if(n_channels==1): x_train = np.repeat(x_train,3, axis=3) x_test = np.repeat(x_test,3, axis=3) n_channels=3 input_shape = (img_rows, img_cols, n_channels) ''' num_classes = np.shape(np.unique(y_train))[0] x_train = x_train.astype('float32') x_test = x_test.astype('float32') if normalize_data: #Simple norm 0.1 #x_train /= 255 #x_test /= 255 #Standard norm mean 0 , std 1, per input #this normalization is very bad. BTEK for IMAGES #trn_mn = np.mean(x_train, axis=0) this normalization is very bad. BTEK for IMAGES #trn_std = np.std(x_train, axis=0) this normalization is very bad. BTEK for IMAGES # Standard for mean 127 and std per image. # This does not have 0 mean but some negative value # Std is 1.0 some paper results wer taken by this I guess # trn_mn = np.mean(x_train) # trn_std = np.std(x_train) # x_train -= 127.0 # I use this because other normalizations do not create symmetric distribution. # x_test -= 127.0 # x_train/=(trn_std+1e-7) # x_test/=(trn_std+1e-7) # print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train)) # print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test)) # Standard for mean 127 and std per image. # This does not have 0 mean and std is not 1.0 # Std is # x_train /= (255/4) # x_test /= (255/4) # x_train -= 2.0 # x_test -= 2.0 # print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train)) # print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test)) # CHANGİNG THİS aug2020 FOR FACES TEST x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 trn_mn = np.mean(x_train, axis=0) x_train -= trn_mn x_test -= trn_mn print('x_train shape:', x_train.shape) print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train)) print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test)) # non-zero normalization. # trn_mn = np.mean(x_train[np.nonzero(x_train)]) # trn_std = np.std(x_train[np.nonzero(x_train)]) # x_train[np.nonzero(x_train)] -= trn_mn # x_test[np.nonzero(x_test)] -= trn_mn # print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train)) # print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test)) # x_train/=(trn_std+1e-7) # x_test/=(trn_std+1e-7) # print("Data normed Mean(train):", np.mean(x_train), " Std(train):", np.std(x_train)) # print("Data normed Mean(test):", np.mean(x_test), " Std(test):", np.std(x_test)) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) return x_train, y_train, x_test, y_test, input_shape, num_classes