def test_load_fake_lfw_pairs(): lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA, download_if_missing=False) # The data is croped around the center as a rectangular bounding box # around the face. Colors are converted to gray levels: assert_equal(lfw_pairs_train.pairs.shape, (10, 2, 62, 47)) # the target is whether the person is the same or not assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) # names of the persons can be found using the target_names array expected_classes = ['Different persons', 'Same person'] assert_array_equal(lfw_pairs_train.target_names, expected_classes) # It is possible to ask for the original data without any croping or color # conversion lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA, resize=None, slice_=None, color=True, download_if_missing=False) assert_equal(lfw_pairs_train.pairs.shape, (10, 2, 250, 250, 3)) # the ids and class names are the same as previously assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) assert_array_equal(lfw_pairs_train.target_names, expected_classes)
def test_load_fake_lfw_pairs(): lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA, download_if_missing=False) # The data is croped around the center as a rectangular bounding box # around the face. Colors are converted to gray levels: assert lfw_pairs_train.pairs.shape == (10, 2, 62, 47) # the target is whether the person is the same or not assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) # names of the persons can be found using the target_names array expected_classes = ["Different persons", "Same person"] assert_array_equal(lfw_pairs_train.target_names, expected_classes) # It is possible to ask for the original data without any croping or color # conversion lfw_pairs_train = fetch_lfw_pairs( data_home=SCIKIT_LEARN_DATA, resize=None, slice_=None, color=True, download_if_missing=False, ) assert lfw_pairs_train.pairs.shape == (10, 2, 250, 250, 3) # the ids and class names are the same as previously assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) assert_array_equal(lfw_pairs_train.target_names, expected_classes) assert lfw_pairs_train.DESCR.startswith( ".. _labeled_faces_in_the_wild_dataset:")
def LoadData(im_dim=150, im_ch=3, method='train'): from sklearn.datasets import fetch_lfw_pairs import warnings start = 0 stop = 250 color = False if im_dim < 250: start = (250 - im_dim) // 2 stop = start + im_dim if im_ch > 1: color = True warnings.filterwarnings("ignore") lfw_people = fetch_lfw_pairs(subset=method, slice_=(slice(start, stop, None), slice(start, stop, None)), resize=1, color=color) lfw_people.pairs = lfw_people.pairs / 255. if method == 'train': lfw_people.pairs = np.concatenate( (lfw_people.pairs, np.swapaxes( np.array([ lfw_people.pairs[:, 1, ...], lfw_people.pairs[:, 0, ...] ]), 0, 1)), 0) lfw_people.target = np.concatenate( (lfw_people.target, lfw_people.target), 0) return lfw_people
def prepare_data(): lfw_pairs_train = fetch_lfw_pairs(subset='train') images = lfw_pairs_train.pairs targets = lfw_pairs_train.target # print "lfw_pairs_train.pairs.shape: ", lfw_pairs_train.pairs.shape # print "images.shape: ", images.shape # print "targets.shape: ", targets.shape # for i in range(2): # subplot(1, 2, i + 1), imshow(images[202,i,:,:], cmap=cm.gray) # show() return images, targets
def __init__(self, train=True, color=False, funneled=True, resize=1.0, transform=None, should_invert=True): splitName = 'train' if train else 'test' self.data = fetch_lfw_pairs(subset=splitName, funneled=funneled, resize=resize, color=color) print("Wild Faces") print(self.data.pairs.shape) self.image_len = len(self.data.target) self.train = train self.transform = transform self.should_invert = should_invert self.color = color
def test_load_empty_lfw_pairs(): fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA, download_if_missing=False)
def test_load_empty_lfw_pairs(): with pytest.raises(IOError): fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA, download_if_missing=False)
# -*- encoding: utf-8 -*- """ 8.2.1 Scikit-learn自带的数据集 """ from sklearn import datasets as dss import matplotlib.pyplot as plt lfwp = dss.fetch_lfw_pairs() # 加载数据集 print(lfwp.keys()) # 数据集带有若干子集 print(lfwp.data.shape, lfwp.data.dtype) # data子集有2200个样本 print(lfwp.pairs.shape, lfwp.pairs.dtype) # pairs子集有2200个样本,每个样本2张图片 print(lfwp.target_names) # 有两个标签:不是同一个人、是同一个人 print(lfwp.target.shape, lfwp.target.dtype) # 2200个样本的标签,表示样本是否是一个人 plt.subplot(121) plt.imshow(lfwp.pairs[0,0], cmap=plt.cm.gray) plt.subplot(122) plt.imshow(lfwp.pairs[0,1], cmap=plt.cm.gray) plt.show()
# python 3.7 # Scikit-learn ver. 0.23.2 import sklearn from sklearn import datasets from sklearn import preprocessing from sklearn.preprocessing import scale from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn import metrics # matplotlib 3.3.1 from matplotlib import pyplot irises = datasets.load_iris() wine = datasets.load_wine() facePairs = datasets.fetch_lfw_pairs() breastCancer = datasets.load_breast_cancer() covertypes = datasets.fetch_covtype() diabetes = datasets.load_diabetes() olvettiFaces = datasets.fetch_olivetti_faces() # change this to test different datasets dataset = wine dimensionality = 13 X = dataset.data.reshape(len(dataset.data), dimensionality) Y = dataset.target scaled = preprocessing.MinMaxScaler() scaled_data = scaled.fit_transform(X) trainX, testX, trainY, testY = train_test_split(X,
def __init__(self, batch_size=64, resize=0.3, validation_per=0.1, epochs=10, iterations=5): """ Class that handles creating new model, retreiving dataset (train and test), training and testing the model. :param batch_size: choose the size of batch for the training process :param resize: resize the images (float between 0 and 1) :param validation_per: percent of the validation set :param epochs: number of epoch for each iteration :param iterations: number of iterations """ self.lfw_pairs_train = fetch_lfw_pairs(subset='train', funneled=False, resize=resize, slice_=None, color=False) self.lfw_pairs_train.pairs = self.lfw_pairs_train.pairs / 255 shape = self.lfw_pairs_train.pairs.shape[2] K.clear_session() learning_rate = 10e-4 # l2-regularization penalization for each layer self.l2_penalization = self.def_penalization() self.input_shape = (shape, shape, 1) convolutional_net = self.create_conv_net() # Now the pairs of images input_image_1 = Input(self.input_shape) input_image_2 = Input(self.input_shape) encoded_image_1 = convolutional_net(input_image_1) encoded_image_2 = convolutional_net(input_image_2) # L1 distance layer between the two encoded outputs # One could use Subtract from Keras, but we want the absolute value # l1_distance_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)( [encoded_image_1, encoded_image_2]) self.model = Model([input_image_1, input_image_2], distance) # l1_distance = l1_distance_layer([encoded_image_1, encoded_image_2]) rms = RMSprop() opt = Adam(lr=learning_rate) self.model.compile(loss=contrastive_loss, optimizer=rms, metrics=['binary_accuracy']) self.train_network(batch_size, epochs, iterations, validation_per) lfw_pairs_test = fetch_lfw_pairs(subset='test', funneled=False, resize=shape / 250, slice_=None, color=False) lfw_pairs_test.pairs = lfw_pairs_test.pairs / 255 x_test, y_test, _, _ = train_validation_pairs(lfw_pairs_test, valid=0) self.model.evaluate(x_test, y_test)
def load_data(): train_set = fetch_lfw_pairs(subset='train') test_set = fetch_lfw_pairs(subset='test') return train_set.data, test_set.data, train_set.target, test_set.target
def get_dataset(self): return fetch_lfw_pairs()
def __init__(self): self.flp = fetch_lfw_pairs( subset='test', color=True, resize=1 # this transform inputs to (125, 94) from (62, 47) )