def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=2): # 加载数据集到内存 #images, labels = load_dataset(self.path_name) #train_images, valid_images, train_labels, valid_labels = \ # train_test_split(images, labels, test_size=0.3, \ # random_state=random.randint(0, 100)) #print(train_images.shape) #print(valid_images.shape) train_images, train_labels = load_dataset( "genderdetectionfacedata/train") test_images, test_labels = load_dataset("genderdetectionfacedata/test") valid_images, valid_labels = load_dataset( "genderdetectionfacedata/valid") #print(test_images.shape) self.input_shape = (train_images[0].shape) # 输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将 # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 train_labels = np_utils.to_categorical(train_labels, nb_classes) #print(train_labels[0]) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows=size, img_cols=size, img_channels=3, nb_classes=2): # 加载数据集到内存 images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split( images, labels, test_size=0.2, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.2, random_state=random.randint(0, 100)) # 当前的维度顺序如果是"th",则输入图片数据时的顺序为:channels,rows,cols 否则:rows,cols,channels # 这部分代码就是根据keras库要求的维度顺序重组训练数据集 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') # 我们的模型使用catergorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes # 将类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=2): images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split( images, labels, test_size=0.3, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.5, random_state=random.randint(0, 100)) if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3): # 加载数据集到内存 images, labels , nb_classes= load_dataset(self.path_name) # 导入了sklearn库的交叉验证模块,利用函数train_test_split()来划分训练集和验证集 # 划分出了30%的数据用于验证,70%用于训练模型 train_images, valid_images, train_labels, valid_labels = train_test_split(images,\ labels, test_size = 0.3, random_state = random.randint(0, 100)) _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5,\ random_state = random.randint(0, 100)) # 当前的维度顺序如果为'channels_first',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels # 这部分代码就是根据keras库要求的维度顺序重组训练数据集 if K.image_data_format() == 'channels_first': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将 # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 将其归一化,图像的各像素值归一化到0~1区间,数据集先浮点后归一化的目的是提升网络收敛速度, # 减少训练时间,同时适应值域在(0,1)之间的激活函数,增大区分度 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, model = facenet): images, labels = load_dataset(self.path_name) X_embedding = img_to_encoding(images, model) print('X_train shape', X_embedding.shape) print('y_train shape', labels.shape) print(X_embedding.shape[0], 'train samples') self.X_train = X_embedding self.y_train = labels
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=2): # 加载数据集到内存 images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size=0.3, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split(images, labels, test_size=0.5, random_state=random.randint(0, 100)) # 通道 行 列顺序 # theano 作为后端:channels,rows,cols TensorFlow作为后端:rows,cols,channels if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集、验证集、测试集的数量 print(train_images.shape[0], '训练数据') print(valid_images.shape[0], '验证数据') print(test_images.shape[0], '测试数据') # 将类别标签进行one-hot编码使其向量化,两种类别转为二维数组 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, nb_classes = 2): #加在數據集到内存 images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.4, random_state = random.randint(0, 100)) _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100)) #當前的維度順序如果為'th',則輸入照片的順序為:channels,rows,cols,否則:rows,cols,channels #這部分代碼就是根據keras酷要求的維度順序重組訓練數集 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) #輸出訓練集、驗證集、測試集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') #我们的模型使用categorical_crossentropy作為損失函數,因此需要根據類別數量nb_classes將 #類別標前進行one-hot编码使其向量化,在這裡我们的類別只有兩種,經過轉化後標籤數據變為二維 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) #像素數據浮點化以便歸一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #將其歸一化,圖像的各像素值歸一化到0~1區間 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, model=facenet): # 加载数据集到内存 images, labels = load_dataset(self.path_name) # 生成128维特征向量 X_embedding = img_to_encoding( images, model ) # 考虑这里分批执行,否则可能内存不够,这里在img_to_encoding函数里通过predict的batch_size参数实现 # 输出训练集、验证集和测试集的数量 print('X_train shape', X_embedding.shape) print('y_train shape', labels.shape) print(X_embedding.shape[0], 'train samples') # 这里对X_train就不再进一步normalization了,因为已经在facenet里有了l2_norm self.X_train = X_embedding self.y_train = labels
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=3): # 加载数据集到内存 images, labels = load_dataset(self.path_name) #train_test_split()函数会按照训练集特征数据(这里就是图像数据)、测试集特征数据、训练集标签、测试集标签的顺序返回各数据集 #train_test_split是交叉验证中常用的函数,功能是从样本中随机的按比例选区trian_data和testdata train_images, valid_images, train_labels, valid_labels = train_test_split( images, labels, test_size=0.3, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.5, random_state=random.randint(0, 100)) # 后端系统决定图像数据输入cnn网络时的维度顺序,当前的维度顺序如果为'th'(theano),则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels(TF) # 这部分代码就是根据keras库要求的维度顺序重组训练数据集 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: #reshape函数重新调整向量维度 train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将 # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) # 像素数据浮点化以便归一化 目的是提升网络收敛速度,减少训练时间,同时适应值域在(0,1)之间的激活函数,增大区分度。 #归一化有一个特别重要的原因是确保特征值权重一致 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
2、训练完后的模型保存 3、保存路径为model文件下 """ if __name__ == '__main__': # 读取数据 dataset = Dataset(r'E:\sign_system\face_asia_500_crop') # 读取路径 path_name = r'E:\sign_system\face_asia_500_crop' dataset.load() model = Model_train() # 训练模型 _, _, num_classes = load_dataset(path_name) print(num_classes) print('\nTrain_Starting--------------------------------------------------') model.Squeezenet(num_classes, dataset) model.train(dataset) print('Model Saved.') model.save_model(file_path = 'E:/sign_system/execute_system/haar_extract/squeezenetface4.h5') # 评估模型 # model = Model_train() # print('\nTesting---------------------------------------------------------') # model.load_model(file_path = 'C:\Users\Administrator\Desktop\FaceRecognition_Version3\model\squeezenet.model.h5')
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=2): #加载数据集到内存 images, labels = load_dataset(self.path_name) #/////第一步,交叉验证。划分训练集和验证集,以及测试集 train_images, valid_images, train_labels, valid_labels = train_test_split( images, labels, test_size=0.3, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.5, random_state=random.randint(0, 100)) #当前的维度顺序如果是‘th’,则输入图片数据时的顺序为:channels,rows, #cols,否则是:rows,cols,channels #根据keras库要的维度顺序重组训练数据集 #print(K.image_data_format()) #/////第二步,根据后端系统,重新调整数组的维度 if K.image_data_format() == 'channels_first': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) #输出训练集、验证集 、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') #模型使用categorical_crossentropy作为损失函数,因此需要根据类别 #数量nb_classes将类别标签进行one-hot编码使其向量化, #经过转化后标签数据变为二维 #/////第三步,一位有效编码。针对标签。 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) #像素数据浮点化以便归一化 #/////第四步,先浮点后归一化。目的,提升网络收敛速度,减少训练 #/////时间,同事适应值在(0,1)之间的激活函数,增大区分度。 #归一化有一重要特性,就是保持特征值权重一致。 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #将其归一化,图像各像素归一化到0-1区间 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3): # 加载数据集到内存 images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = \ train_test_split(images, labels, test_size=0.1,random_state=random.randint(0, 100)) #print(train_labels) valid_images, test_images, valid_labels, test_labels = \ train_test_split(valid_images, valid_labels, test_size=0.5,random_state=random.randint(0, 100)) # 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels # 这部分代码就是根据keras库要求的维度顺序重组训练数据集 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') # 我们的模型使用categorical_crossentropy(交叉熵损失函数)作为损失函数, # 该函数要求标签集必须采用独热编码的形式, # 因此需要根据类别数量nb_classes将 # 类别标签进行one-hot编码使其向量化 train_labels = np_utils.to_categorical(train_labels) valid_labels = np_utils.to_categorical(valid_labels) test_labels = np_utils.to_categorical(test_labels) print(len(train_labels[1, ])) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 将其归一化,图像的各像素值归一化到0~1区间 # 目的是提升网络收敛速度,减少训练时间,同时适应值域在(0,1)之间的激活函数, # 此外,确保特征值权重一致,避免大值对误差值有较大影响 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels self.nb_classes = len(train_labels[0])
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=0): # 載入數據到記憶體 images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split( images, labels, test_size=0.3, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.5, random_state=random.randint(0, 100)) # 如果現在的維度是 th 則輸入的順序為 channels,rows,cols 否則是 rows,cols,channels # 根據 keras 要求的維度順序訓練模組 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 印出訓練圖片、驗證圖片、測試圖片的數量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') # 此模型用 categorical_crossentropy 作為損失函數,將圖片根據要訓練的人數向量化 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) # 將數值標準化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 將數值標準化至 0-1 之間 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def build_model(self, dataset, path_name): # 确定目标的类别 _, _, nb_classes = load_dataset(path_name) # 构建一个空的网络模型,它是一个线性堆叠模型,各神经网络层会被顺序添加,专业名称为序贯模型或线性堆叠模型 self.model = Sequential() # 以下代码将顺序添加CNN网络需要的各层,一个add就是一个网络层 #1 2维卷积层1,卷积核为32 × 3 × 3,边界为"same"就是保持卷积后图像大小不变,就是需要padding填充 # 输入图形一般就是数据集的形状,然后输出为64 × 64 × 32,参数为(3 × 3 × 3 + 1) × 32 = 896 self.model.add( Conv2D(32, (3, 3), padding='same', input_shape=dataset.input_shape)) #2 激活函数层,输出为64 × 64 × 32 self.model.add(Activation('relu')) #3 2维卷积层2,卷积核为32 × 3 × 3,输出为62 × 62 × 32,参数为9248 self.model.add(Conv2D(32, (3, 3))) #4 激活函数层,输出为62 × 62 × 32 self.model.add(Activation('relu')) #5 池化层1,池化区域为2 × 2,输出为31 × 31 × 32 self.model.add(MaxPooling2D(pool_size=(2, 2))) #6 Dropout层1,dropout率为0.25,输出为31 × 31 × 32 self.model.add(Dropout(0.25)) #7 2维卷积层3,卷积核为64 × 3 × 3,边界为"same"就是保持卷积后图像大小不变,就是需要padding填充 # 输出为31 × 31 × 64,参数为18496 self.model.add(Conv2D(64, (3, 3), padding='same')) #8 激活函数层,输出为31 × 31 × 64 self.model.add(Activation('relu')) #9 2维卷积层4,卷积核为32 × 3 × 3,输出为29 × 29 × 64,参数为36928 self.model.add(Conv2D(64, (3, 3))) #10 激活函数层,输出为输出为29 × 29 × 64 self.model.add(Activation('relu')) #11 池化层2,池化区域为2 × 2,输出为14 × 14 × 64 self.model.add(MaxPooling2D(pool_size=(2, 2))) #12 Dropout层2,dropout率为0.25,输出为14 × 14 × 64 self.model.add(Dropout(0.25)) #13 Flatten层,输出14 × 14 × 64=12455 self.model.add(Flatten()) #14 Dense层1,又被称作全连接层,输出为512,参数为6423040 self.model.add(Dense(512)) #15 激活函数层,输出为512 self.model.add(Activation('relu')) #16 Dropout层3,dropout率设置为0.5 self.model.add(Dropout(0.5)) #17 Dense层2,输出属于各类别概率,参数为1026 self.model.add(Dense(nb_classes)) #18 分类层,输出最终结果 self.model.add(Activation('softmax')) #输出模型概况 self.model.summary() return nb_classes
def load(self, img_rows=224, img_cols=224, img_channels=3, nb_classes=111): #加载数据集到内存 images, labels = load_dataset(self.path_name) test_images = images[range(2, 777, 7)] valid_images = images[range(1, 777, 7)] train_images = images[list(range(0, 777, 7)) + list(range(3, 777, 7)) + list(range(4, 777, 7)) + list(range(5, 777, 7)) + list(range(6, 777, 7))] test_labels = labels[2:777:7] valid_labels = labels[1:777:7] train_labels = labels[0:777:7] + labels[3:777:7] + labels[ 4:777:7] + labels[5:777:7] + labels[6:777:7] #train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100)) #_, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100)) #当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels #这部分代码就是根据keras库要求的维度顺序重组训练数据集 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) #输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') #我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将 #类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 # train_labels = np_utils.to_categorical(train_labels, nb_classes) # valid_labels = np_utils.to_categorical(valid_labels, nb_classes) # test_labels = np_utils.to_categorical(test_labels, nb_classes) #像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 lb = LabelBinarizer().fit(np.array(range(0, nb_classes))) self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = lb.transform(train_labels) self.valid_labels = lb.transform(valid_labels) self.test_labels = lb.transform(test_labels)
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=2): #加载数据到内存 images, labels = load_dataset(self.path_name) train_images,valid_images,train_labels,valid_labels = \ train_test_split(images,labels,test_size=0.3,random_state=random.randint(0,100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.5, random_state=random.randint(0, 100)) # 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels # 这部分代码就是根据keras库要求的维度顺序重组训练数据集 if k.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将 # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 #数据集先浮点后归一化的目的是提升网络收敛速度,减少训练时间,同时适应值域在(0,1)之间的激活函数, # 增大区分度。其实归一化有一个特别重要的原因是确保特征值权重一致。举个例子,我们使用mse这样的均方误差函数时, # 大的特征数值比如(5000-1000)2与小的特征值(3-1)2相加再求平均得到的误差值,显然大值对误差值的影响最大,但大部分情况下, # 特征值的权重应该是一样的,只是因为单位不同才导致数值相差甚大。因此,我们提前对特征数据做归一化处理,以解决此类问题。 self.train_images = train_images self.train_labels = train_labels self.valid_images = valid_images self.valid_labels = valid_labels self.test_images = test_images self.test_labels = test_labels
# -*- coding: utf-8 -*- from keras.models import Sequential from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten from keras.models import load_model from load_face_dataset import load_dataset from keras.optimizers import SGD from keras.preprocessing.image import ImageDataGenerator import numpy # fix random seed for reproducibility seed = 7 numpy.random.seed(seed) # 导入皮马印第安人糖尿病数据集. 它描述了病人医疗记录和他们是否在五年内发病。 images, labels = load_dataset('../faces') # 8个输入变量 1个输出变量,最后一列为输出 1为出现糖尿病,0为未出现 X = images # 输入 Y = labels # 输出 # model = Sequential() # model.add(Conv2D(64, (3, 3), padding='same', input_shape=(64, 64, 3), activation='relu')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Conv2D(32, (3, 3), activation='relu')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) # model.add(Conv2D(32, (3, 3), activation='relu')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) # model.add(Flatten()) # 13 Flatten层 # model.add(Dense(128, activation='relu')) # 14 Dense层,又被称作全连接层
from keras.models import load_model from load_face_dataset import load_dataset, resize_image, IMAGE_SIZE model = load_model("model_weight.h5") print(model) images, _ = load_dataset('D:/tmp/output/') image = images.reshape(images.shape[0], 64, 64, 3) result = model.predict_classes(image) print('Predicted:', result)