Ejemplo n.º 1
0
    def load(self,
             img_rows=IMAGE_SIZE,
             img_cols=IMAGE_SIZE,
             img_channels=3,
             nb_classes=2):
        # 加载数据集到内存
        #images, labels = load_dataset(self.path_name)
        #train_images, valid_images, train_labels, valid_labels = \
        #    train_test_split(images, labels, test_size=0.3, \
        #                     random_state=random.randint(0, 100))
        #print(train_images.shape)
        #print(valid_images.shape)
        train_images, train_labels = load_dataset(
            "genderdetectionfacedata/train")
        test_images, test_labels = load_dataset("genderdetectionfacedata/test")
        valid_images, valid_labels = load_dataset(
            "genderdetectionfacedata/valid")

        #print(test_images.shape)

        self.input_shape = (train_images[0].shape)

        # 输出训练集、验证集、测试集的数量
        print(train_images.shape[0], 'train samples')
        print(valid_images.shape[0], 'valid samples')
        print(test_images.shape[0], 'test samples')

        # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
        # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维
        train_labels = np_utils.to_categorical(train_labels, nb_classes)
        #print(train_labels[0])
        valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
        test_labels = np_utils.to_categorical(test_labels, nb_classes)

        # 像素数据浮点化以便归一化
        train_images = train_images.astype('float32')
        valid_images = valid_images.astype('float32')
        test_images = test_images.astype('float32')

        # 将其归一化,图像的各像素值归一化到0~1区间
        train_images /= 255
        valid_images /= 255
        test_images /= 255

        self.train_images = train_images
        self.valid_images = valid_images
        self.test_images = test_images
        self.train_labels = train_labels
        self.valid_labels = valid_labels
        self.test_labels = test_labels
    def load(self, img_rows=size, img_cols=size, img_channels=3, nb_classes=2):
        # 加载数据集到内存
        images, labels = load_dataset(self.path_name)

        train_images, valid_images, train_labels, valid_labels = train_test_split(
            images, labels, test_size=0.2, random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.2, random_state=random.randint(0, 100))
        # 当前的维度顺序如果是"th",则输入图片数据时的顺序为:channels,rows,cols 否则:rows,cols,channels
        # 这部分代码就是根据keras库要求的维度顺序重组训练数据集
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

        # 输出训练集、验证集、测试集的数量
        print(train_images.shape[0], 'train samples')
        print(valid_images.shape[0], 'valid samples')
        print(test_images.shape[0], 'test samples')

        # 我们的模型使用catergorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes
        # 将类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维
        train_labels = np_utils.to_categorical(train_labels, nb_classes)
        valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
        test_labels = np_utils.to_categorical(test_labels, nb_classes)

        # 像素数据浮点化以便归一化
        train_images = train_images.astype('float32')
        valid_images = valid_images.astype('float32')
        test_images = test_images.astype('float32')

        # 将其归一化,图像的各像素值归一化到0~1区间
        train_images /= 255
        valid_images /= 255
        test_images /= 255

        self.train_images = train_images
        self.valid_images = valid_images
        self.test_images = test_images
        self.train_labels = train_labels
        self.valid_labels = valid_labels
        self.test_labels = test_labels
    def load(self,
             img_rows=IMAGE_SIZE,
             img_cols=IMAGE_SIZE,
             img_channels=3,
             nb_classes=2):

        images, labels = load_dataset(self.path_name)

        train_images, valid_images, train_labels, valid_labels = train_test_split(
            images, labels, test_size=0.3, random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.5, random_state=random.randint(0, 100))

        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

            train_labels = np_utils.to_categorical(train_labels, nb_classes)
            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
            test_labels = np_utils.to_categorical(test_labels, nb_classes)

            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
 def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, 
          img_channels = 3):
     # 加载数据集到内存
     images, labels , nb_classes= load_dataset(self.path_name)        
     
     # 导入了sklearn库的交叉验证模块,利用函数train_test_split()来划分训练集和验证集
     # 划分出了30%的数据用于验证,70%用于训练模型
     train_images, valid_images, train_labels, valid_labels = train_test_split(images,\
     labels, test_size = 0.3, random_state = random.randint(0, 100))        
     _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5,\
     random_state = random.randint(0, 100))                
     
     # 当前的维度顺序如果为'channels_first',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels
     # 这部分代码就是根据keras库要求的维度顺序重组训练数据集
     if K.image_data_format() == 'channels_first':
         train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
         valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
         test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
         self.input_shape = (img_channels, img_rows, img_cols)            
     else:
         train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
         valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
         test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
         self.input_shape = (img_rows, img_cols, img_channels)            
         
         # 输出训练集、验证集、测试集的数量
         print(train_images.shape[0], 'train samples')
         print(valid_images.shape[0], 'valid samples')
         print(test_images.shape[0], 'test samples')
     
         # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
         # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维
         train_labels = np_utils.to_categorical(train_labels, nb_classes)                        
         valid_labels = np_utils.to_categorical(valid_labels, nb_classes)            
         test_labels = np_utils.to_categorical(test_labels, nb_classes)                        
     
         # 像素数据浮点化以便归一化
         train_images = train_images.astype('float32')            
         valid_images = valid_images.astype('float32')
         test_images = test_images.astype('float32')
         
         # 将其归一化,图像的各像素值归一化到0~1区间,数据集先浮点后归一化的目的是提升网络收敛速度,
         # 减少训练时间,同时适应值域在(0,1)之间的激活函数,增大区分度
         train_images /= 255
         valid_images /= 255
         test_images /= 255            
     
         self.train_images = train_images
         self.valid_images = valid_images
         self.test_images  = test_images
         self.train_labels = train_labels
         self.valid_labels = valid_labels
         self.test_labels  = test_labels
Ejemplo n.º 5
0
 def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, model = facenet):
     
     images, labels = load_dataset(self.path_name)
     
     X_embedding = img_to_encoding(images, model)
    
     print('X_train shape', X_embedding.shape)
     print('y_train shape', labels.shape)
     print(X_embedding.shape[0], 'train samples')
    
     self.X_train = X_embedding
     self.y_train = labels
Ejemplo n.º 6
0
    def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE,
             img_channels=3, nb_classes=2):
        # 加载数据集到内存
        images, labels = load_dataset(self.path_name)

        train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size=0.3,
                                                                                  random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(images, labels, test_size=0.5,
                                                          random_state=random.randint(0, 100))

        # 通道 行 列顺序
        # theano 作为后端:channels,rows,cols  TensorFlow作为后端:rows,cols,channels
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
            test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            # 输出训练集、验证集、测试集的数量
            print(train_images.shape[0], '训练数据')
            print(valid_images.shape[0], '验证数据')
            print(test_images.shape[0], '测试数据')

            # 将类别标签进行one-hot编码使其向量化,两种类别转为二维数组
            train_labels = np_utils.to_categorical(train_labels, nb_classes)
            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
            test_labels = np_utils.to_categorical(test_labels, nb_classes)

            # 像素数据浮点化以便归一化
            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            # 将其归一化,图像的各像素值归一化到0~1区间
            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
    def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, 
             img_channels = 3, nb_classes = 2):
            #加在數據集到内存
        images, labels = load_dataset(self.path_name)        

        train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.4, random_state = random.randint(0, 100))        
        _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100))                

            #當前的維度順序如果為'th',則輸入照片的順序為:channels,rows,cols,否則:rows,cols,channels
            #這部分代碼就是根據keras酷要求的維度順序重組訓練數集
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
            test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)            
        else:
            train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)            

                #輸出訓練集、驗證集、測試集的数量
            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

                #我们的模型使用categorical_crossentropy作為損失函數,因此需要根據類別數量nb_classes將
                #類別標前進行one-hot编码使其向量化,在這裡我们的類別只有兩種,經過轉化後標籤數據變為二維
            train_labels = np_utils.to_categorical(train_labels, nb_classes)                        
            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)            
            test_labels = np_utils.to_categorical(test_labels, nb_classes)                        

                #像素數據浮點化以便歸一化
            train_images = train_images.astype('float32')            
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

                #將其歸一化,圖像的各像素值歸一化到0~1區間
            train_images /= 255
            valid_images /= 255
            test_images /= 255            

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images  = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels  = test_labels
 def load(self,
          img_rows=IMAGE_SIZE,
          img_cols=IMAGE_SIZE,
          img_channels=3,
          model=facenet):
     # 加载数据集到内存
     images, labels = load_dataset(self.path_name)
     # 生成128维特征向量
     X_embedding = img_to_encoding(
         images, model
     )  # 考虑这里分批执行,否则可能内存不够,这里在img_to_encoding函数里通过predict的batch_size参数实现
     # 输出训练集、验证集和测试集的数量
     print('X_train shape', X_embedding.shape)
     print('y_train shape', labels.shape)
     print(X_embedding.shape[0], 'train samples')
     # 这里对X_train就不再进一步normalization了,因为已经在facenet里有了l2_norm
     self.X_train = X_embedding
     self.y_train = labels
Ejemplo n.º 9
0
    def load(self,
             img_rows=IMAGE_SIZE,
             img_cols=IMAGE_SIZE,
             img_channels=3,
             nb_classes=3):
        # 加载数据集到内存
        images, labels = load_dataset(self.path_name)

        #train_test_split()函数会按照训练集特征数据(这里就是图像数据)、测试集特征数据、训练集标签、测试集标签的顺序返回各数据集
        #train_test_split是交叉验证中常用的函数,功能是从样本中随机的按比例选区trian_data和testdata
        train_images, valid_images, train_labels, valid_labels = train_test_split(
            images, labels, test_size=0.3, random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.5, random_state=random.randint(0, 100))

        # 后端系统决定图像数据输入cnn网络时的维度顺序,当前的维度顺序如果为'th'(theano),则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels(TF)
        # 这部分代码就是根据keras库要求的维度顺序重组训练数据集
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            #reshape函数重新调整向量维度
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            # 输出训练集、验证集、测试集的数量
            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

            # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
            # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维

            train_labels = np_utils.to_categorical(train_labels, nb_classes)
            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
            test_labels = np_utils.to_categorical(test_labels, nb_classes)

            # 像素数据浮点化以便归一化  目的是提升网络收敛速度,减少训练时间,同时适应值域在(0,1)之间的激活函数,增大区分度。
            #归一化有一个特别重要的原因是确保特征值权重一致
            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            # 将其归一化,图像的各像素值归一化到0~1区间
            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
   2、训练完后的模型保存
   3、保存路径为model文件下
"""


if __name__ == '__main__':
    
    # 读取数据
    dataset = Dataset(r'E:\sign_system\face_asia_500_crop') 
    # 读取路径    
    path_name = r'E:\sign_system\face_asia_500_crop'
    dataset.load()
    model = Model_train()
    
    # 训练模型  
    _,  _, num_classes = load_dataset(path_name) 
    print(num_classes)
    
    print('\nTrain_Starting--------------------------------------------------')
    model.Squeezenet(num_classes, dataset)
      
    model.train(dataset)
    
    print('Model Saved.')
    model.save_model(file_path = 'E:/sign_system/execute_system/haar_extract/squeezenetface4.h5')
    
   
    # 评估模型
#    model = Model_train()
#    print('\nTesting---------------------------------------------------------')
#    model.load_model(file_path = 'C:\Users\Administrator\Desktop\FaceRecognition_Version3\model\squeezenet.model.h5')
Ejemplo n.º 11
0
    def load(self,
             img_rows=IMAGE_SIZE,
             img_cols=IMAGE_SIZE,
             img_channels=3,
             nb_classes=2):
        #加载数据集到内存
        images, labels = load_dataset(self.path_name)

        #/////第一步,交叉验证。划分训练集和验证集,以及测试集
        train_images, valid_images, train_labels, valid_labels = train_test_split(
            images, labels, test_size=0.3, random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.5, random_state=random.randint(0, 100))

        #当前的维度顺序如果是‘th’,则输入图片数据时的顺序为:channels,rows,
        #cols,否则是:rows,cols,channels
        #根据keras库要的维度顺序重组训练数据集
        #print(K.image_data_format())

        #/////第二步,根据后端系统,重新调整数组的维度
        if K.image_data_format() == 'channels_first':

            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            #输出训练集、验证集 、测试集的数量
            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

            #模型使用categorical_crossentropy作为损失函数,因此需要根据类别
            #数量nb_classes将类别标签进行one-hot编码使其向量化,
            #经过转化后标签数据变为二维

            #/////第三步,一位有效编码。针对标签。
            train_labels = np_utils.to_categorical(train_labels, nb_classes)
            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
            test_labels = np_utils.to_categorical(test_labels, nb_classes)

            #像素数据浮点化以便归一化

            #/////第四步,先浮点后归一化。目的,提升网络收敛速度,减少训练
            #/////时间,同事适应值在(0,1)之间的激活函数,增大区分度。
            #归一化有一重要特性,就是保持特征值权重一致。
            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            #将其归一化,图像各像素归一化到0-1区间
            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images

            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
Ejemplo n.º 12
0
    def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3):
        # 加载数据集到内存
        images, labels = load_dataset(self.path_name)

        train_images, valid_images, train_labels, valid_labels = \
            train_test_split(images, labels, test_size=0.1,random_state=random.randint(0, 100))
        #print(train_labels)
        valid_images, test_images, valid_labels, test_labels = \
            train_test_split(valid_images, valid_labels, test_size=0.5,random_state=random.randint(0, 100))

        # 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels
        # 这部分代码就是根据keras库要求的维度顺序重组训练数据集
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            # 输出训练集、验证集、测试集的数量
            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

            # 我们的模型使用categorical_crossentropy(交叉熵损失函数)作为损失函数,
            # 该函数要求标签集必须采用独热编码的形式,
            # 因此需要根据类别数量nb_classes将
            # 类别标签进行one-hot编码使其向量化

            train_labels = np_utils.to_categorical(train_labels)
            valid_labels = np_utils.to_categorical(valid_labels)
            test_labels = np_utils.to_categorical(test_labels)
            print(len(train_labels[1, ]))

            # 像素数据浮点化以便归一化
            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            # 将其归一化,图像的各像素值归一化到0~1区间
            # 目的是提升网络收敛速度,减少训练时间,同时适应值域在(0,1)之间的激活函数,
            # 此外,确保特征值权重一致,避免大值对误差值有较大影响
            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
            self.nb_classes = len(train_labels[0])
    def load(self,
             img_rows=IMAGE_SIZE,
             img_cols=IMAGE_SIZE,
             img_channels=3,
             nb_classes=0):
        # 載入數據到記憶體
        images, labels = load_dataset(self.path_name)

        train_images, valid_images, train_labels, valid_labels = train_test_split(
            images, labels, test_size=0.3, random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.5, random_state=random.randint(0, 100))

        # 如果現在的維度是 th 則輸入的順序為 channels,rows,cols 否則是 rows,cols,channels
        # 根據 keras 要求的維度順序訓練模組
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            # 印出訓練圖片、驗證圖片、測試圖片的數量
            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

            # 此模型用 categorical_crossentropy 作為損失函數,將圖片根據要訓練的人數向量化
            train_labels = np_utils.to_categorical(train_labels, nb_classes)
            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
            test_labels = np_utils.to_categorical(test_labels, nb_classes)

            # 將數值標準化
            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            # 將數值標準化至 0-1 之間
            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
    def build_model(self, dataset, path_name):

        # 确定目标的类别
        _, _, nb_classes = load_dataset(path_name)

        # 构建一个空的网络模型,它是一个线性堆叠模型,各神经网络层会被顺序添加,专业名称为序贯模型或线性堆叠模型
        self.model = Sequential()

        # 以下代码将顺序添加CNN网络需要的各层,一个add就是一个网络层

        #1 2维卷积层1,卷积核为32 × 3 × 3,边界为"same"就是保持卷积后图像大小不变,就是需要padding填充
        #  输入图形一般就是数据集的形状,然后输出为64 × 64 × 32,参数为(3 × 3 × 3 + 1) × 32 = 896
        self.model.add(
            Conv2D(32, (3, 3), padding='same',
                   input_shape=dataset.input_shape))
        #2 激活函数层,输出为64 × 64 × 32
        self.model.add(Activation('relu'))

        #3 2维卷积层2,卷积核为32 × 3 × 3,输出为62 × 62 × 32,参数为9248
        self.model.add(Conv2D(32, (3, 3)))

        #4 激活函数层,输出为62 × 62 × 32
        self.model.add(Activation('relu'))

        #5 池化层1,池化区域为2 × 2,输出为31 × 31 × 32
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        #6 Dropout层1,dropout率为0.25,输出为31 × 31 × 32
        self.model.add(Dropout(0.25))

        #7 2维卷积层3,卷积核为64 × 3 × 3,边界为"same"就是保持卷积后图像大小不变,就是需要padding填充
        #  输出为31 × 31 × 64,参数为18496
        self.model.add(Conv2D(64, (3, 3), padding='same'))

        #8  激活函数层,输出为31 × 31 × 64
        self.model.add(Activation('relu'))

        #9  2维卷积层4,卷积核为32 × 3 × 3,输出为29 × 29 × 64,参数为36928
        self.model.add(Conv2D(64, (3, 3)))

        #10 激活函数层,输出为输出为29 × 29 × 64
        self.model.add(Activation('relu'))

        #11 池化层2,池化区域为2 × 2,输出为14 × 14 × 64
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        #12 Dropout层2,dropout率为0.25,输出为14 × 14 × 64
        self.model.add(Dropout(0.25))

        #13 Flatten层,输出14 × 14 × 64=12455
        self.model.add(Flatten())

        #14 Dense层1,又被称作全连接层,输出为512,参数为6423040
        self.model.add(Dense(512))

        #15 激活函数层,输出为512
        self.model.add(Activation('relu'))

        #16 Dropout层3,dropout率设置为0.5
        self.model.add(Dropout(0.5))

        #17 Dense层2,输出属于各类别概率,参数为1026
        self.model.add(Dense(nb_classes))

        #18 分类层,输出最终结果
        self.model.add(Activation('softmax'))

        #输出模型概况
        self.model.summary()

        return nb_classes
Ejemplo n.º 15
0
    def load(self, img_rows=224, img_cols=224, img_channels=3, nb_classes=111):
        #加载数据集到内存
        images, labels = load_dataset(self.path_name)
        test_images = images[range(2, 777, 7)]
        valid_images = images[range(1, 777, 7)]
        train_images = images[list(range(0, 777, 7)) + list(range(3, 777, 7)) +
                              list(range(4, 777, 7)) + list(range(5, 777, 7)) +
                              list(range(6, 777, 7))]

        test_labels = labels[2:777:7]
        valid_labels = labels[1:777:7]
        train_labels = labels[0:777:7] + labels[3:777:7] + labels[
            4:777:7] + labels[5:777:7] + labels[6:777:7]

        #train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100))
        #_, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100))

        #当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels
        #这部分代码就是根据keras库要求的维度顺序重组训练数据集
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

        #输出训练集、验证集、测试集的数量
        print(train_images.shape[0], 'train samples')
        print(valid_images.shape[0], 'valid samples')
        print(test_images.shape[0], 'test samples')

        #我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
        #类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维
        #            train_labels = np_utils.to_categorical(train_labels, nb_classes)
        #            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
        #            test_labels = np_utils.to_categorical(test_labels, nb_classes)

        #像素数据浮点化以便归一化
        train_images = train_images.astype('float32')
        valid_images = valid_images.astype('float32')
        test_images = test_images.astype('float32')

        #将其归一化,图像的各像素值归一化到0~1区间
        train_images /= 255
        valid_images /= 255
        test_images /= 255
        lb = LabelBinarizer().fit(np.array(range(0, nb_classes)))

        self.train_images = train_images
        self.valid_images = valid_images
        self.test_images = test_images
        self.train_labels = lb.transform(train_labels)
        self.valid_labels = lb.transform(valid_labels)
        self.test_labels = lb.transform(test_labels)
Ejemplo n.º 16
0
    def load(self,
             img_rows=IMAGE_SIZE,
             img_cols=IMAGE_SIZE,
             img_channels=3,
             nb_classes=2):
        #加载数据到内存
        images, labels = load_dataset(self.path_name)

        train_images,valid_images,train_labels,valid_labels  = \
            train_test_split(images,labels,test_size=0.3,random_state=random.randint(0,100))

        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.5, random_state=random.randint(0, 100))

        # 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels
        # 这部分代码就是根据keras库要求的维度顺序重组训练数据集

        if k.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

        # 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
        # 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维

        train_labels = np_utils.to_categorical(train_labels, nb_classes)
        valid_labels = np_utils.to_categorical(valid_labels, nb_classes)

        test_labels = np_utils.to_categorical(test_labels, nb_classes)

        # 像素数据浮点化以便归一化
        train_images = train_images.astype('float32')
        valid_images = valid_images.astype('float32')
        test_images = test_images.astype('float32')

        #将其归一化,图像的各像素值归一化到0~1区间

        train_images /= 255
        valid_images /= 255
        test_images /= 255
        #数据集先浮点后归一化的目的是提升网络收敛速度,减少训练时间,同时适应值域在(0,1)之间的激活函数,
        # 增大区分度。其实归一化有一个特别重要的原因是确保特征值权重一致。举个例子,我们使用mse这样的均方误差函数时,
        # 大的特征数值比如(5000-1000)2与小的特征值(3-1)2相加再求平均得到的误差值,显然大值对误差值的影响最大,但大部分情况下,
        # 特征值的权重应该是一样的,只是因为单位不同才导致数值相差甚大。因此,我们提前对特征数据做归一化处理,以解决此类问题。

        self.train_images = train_images
        self.train_labels = train_labels

        self.valid_images = valid_images
        self.valid_labels = valid_labels

        self.test_images = test_images
        self.test_labels = test_labels
Ejemplo n.º 17
0
# -*- coding: utf-8 -*-
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.models import load_model
from load_face_dataset import load_dataset
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
import numpy
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# 导入皮马印第安人糖尿病数据集. 它描述了病人医疗记录和他们是否在五年内发病。
images, labels = load_dataset('../faces')
# 8个输入变量 1个输出变量,最后一列为输出 1为出现糖尿病,0为未出现
X = images  # 输入
Y = labels  # 输出
# model = Sequential()
# model.add(Conv2D(64, (3, 3), padding='same', input_shape=(64, 64, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Conv2D(32, (3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(32, (3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Flatten())  # 13 Flatten层
# model.add(Dense(128, activation='relu'))  # 14 Dense层,又被称作全连接层
Ejemplo n.º 18
0
from keras.models import load_model

from load_face_dataset import load_dataset, resize_image, IMAGE_SIZE

model = load_model("model_weight.h5")
print(model)

images, _ = load_dataset('D:/tmp/output/')

image = images.reshape(images.shape[0], 64, 64, 3)
result = model.predict_classes(image)

print('Predicted:', result)