def DataGen(from_path,to_path,**kws): '''数据生成器 @param from_path 源目录,图像以类别名称建立子目录存储,如“cat,dog” @param to_path 训练目录,从from_path提取文件存放在to_path对应目录 ----------关键字参数----------- @param reset 重置数据集,把to_path删除,重新分割训练 @param split_num 训练,校验,测试分集比例(<1)或个数(>1),如:"0.6,0.2,0.2"或"100,20,20" @param img_width 生成图像宽度 @param img_height 生成图像高度 @param enhance 是否使用数据增强 @param class_mode 分类方法,'binary':二分类,'categorical':多分类 使用范例: (train_gen,valid_gen,test_gen)=DataGen("./from","./to",reset=False,split_num="0.6,0.2,0.2",img_width=150,img_height=150,enhance=False,class_mode='binary') 或 (train_gen,valid_gen,test_gen)=DataGen("./from","./to",split_num="100,20,20",img_width=150,img_height=150,enhance=False) ------- 数据范例: 源目录: ./from_path/cats/* ./from_path/dogs/* 目的目录: ./to_path/train/cats/* ./to_path/train/dogs/* ./to_path/valid/cats/* ./to_path/valid/dogs/* ./to_path/test/cats/* ./to_path/test/dogs/* ''' #参数初始化 reset=False if kws.get('reset') ==None else kws.get('reset') split_num='0.6,0.2,0.2' if kws.get('split_num') ==None else kws.get('split_num') img_width=150 if kws.get('img_width') ==None else kws.get('img_width') img_height=150 if kws.get('img_height')==None else kws.get('img_height') batch_size=32 if kws.get('batch_size')==None else kws.get('batch_size') enhance=False if kws.get('enhance') ==None else kws.get('enhance') class_mode='binary' if kws.get('class_mode')==None else kws.get('class_mode') for k,v in kws.items(): print('%s:%s'%(k,v)) #删除lab_path if reset: print('delete folder:%s'%(to_path)) shutil.rmtree(to_path) if os.path.exists(to_path) else '' #图像分集 if not os.path.exists(to_path): print('imgages_split:%s=>%s'%(from_path,to_path)) funs.images_split(from_path,to_path,"train,valid,test",split_num) #数据生成器-训练数据集 if enhance: train_datagen = ImageDataGenerator( rescale=1./255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) else: train_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory( # This is the target directory '%s/train'%(to_path), # All images will be resized to 150x150 target_size=(img_height, img_width), batch_size=batch_size, # Since we use binary_crossentropy loss, we need binary labels class_mode=class_mode) #数据生成器-校验数据集 valid_datagen = ImageDataGenerator(rescale=1./255) valid_generator = valid_datagen.flow_from_directory( '%s/valid'%(to_path), target_size=(img_height, img_width), batch_size=batch_size, class_mode=class_mode) #数据生成器-测试数据集(测试集数据不能打乱) test_datagen = ImageDataGenerator(rescale=1./255) test_generator = test_datagen.flow_from_directory( '%s/test'%(to_path), target_size=(img_height, img_width), batch_size=batch_size, shuffle=False, #测试集数据不能打乱 class_mode=class_mode) #返回数据生成器 return (train_generator,valid_generator,test_generator)
def DataGen(from_path, to_path, split_num, img_width=150, img_height=150, batch_size=32, enhance=False): '''数据生成器 @param from_path 源目录,图像以类别名称建立子目录存储,如“cat,dog” @param to_path 训练目录,从from_path提取文件存放在to_path对应目录 @param split_num 训练,校验,测试分集比例(<1)或个数(>1),如:"0.6,0.2,0.2"或"100,20,20" @param img_width 生成图像宽度 @param img_height 生成图像高度 @param enhance 是否使用数据增强 使用范例: (train_gen,valid_gen,test_gen)=DataGen("./from","./to","0.6,0.2,0.2",150,150,False) 或 (train_gen,valid_gen,test_gen)=DataGen("./from","./to","100,20,20",150,150,False) ------- 数据范例: 源目录: ./from_path/cats/* ./from_path/dogs/* 目的目录: ./to_path/train/cats/* ./to_path/train/dogs/* ./to_path/valid/cats/* ./to_path/valid/dogs/* ./to_path/test/cats/* ./to_path/test/dogs/* ''' #图像分集 funs.images_split(from_path, to_path, "train,valid,test", split_num) #数据生成器 if enhance: train_datagen = ImageDataGenerator( rescale=1. / 255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, ) else: train_datagen = ImageDataGenerator(rescale=1. / 255) valid_datagen = ImageDataGenerator(rescale=1. / 255) test_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( # This is the target directory '%s/train' % (to_path), # All images will be resized to 150x150 target_size=(img_height, img_width), batch_size=batch_size, # Since we use binary_crossentropy loss, we need binary labels class_mode='binary') valid_generator = test_datagen.flow_from_directory('%s/valid' % (to_path), target_size=(img_height, img_width), batch_size=batch_size, class_mode='binary') test_generator = test_datagen.flow_from_directory('%s/test' % (to_path), target_size=(img_height, img_width), batch_size=batch_size, class_mode='binary') return (train_generator, valid_generator, test_generator)
def main(arg): ''' 主函数 ''' funs.images_split(arg.src, arg.dst, arg.split_class, arg.split_per)