Ejemplo n.º 1
0
def DataGen(from_path,to_path,**kws):
    '''数据生成器
    @param from_path  源目录,图像以类别名称建立子目录存储,如“cat,dog”
    @param to_path    训练目录,从from_path提取文件存放在to_path对应目录
    ----------关键字参数-----------
    @param reset      重置数据集,把to_path删除,重新分割训练
    @param split_num  训练,校验,测试分集比例(<1)或个数(>1),如:"0.6,0.2,0.2"或"100,20,20"    
    @param img_width  生成图像宽度
    @param img_height 生成图像高度
    @param enhance    是否使用数据增强
    @param class_mode 分类方法,'binary':二分类,'categorical':多分类
    使用范例:
        (train_gen,valid_gen,test_gen)=DataGen("./from","./to",reset=False,split_num="0.6,0.2,0.2",img_width=150,img_height=150,enhance=False,class_mode='binary')
        或
        (train_gen,valid_gen,test_gen)=DataGen("./from","./to",split_num="100,20,20",img_width=150,img_height=150,enhance=False)
        -------
    数据范例:
        源目录:
            ./from_path/cats/*
            ./from_path/dogs/*
        目的目录:
            ./to_path/train/cats/*
            ./to_path/train/dogs/*
            ./to_path/valid/cats/*
            ./to_path/valid/dogs/*
            ./to_path/test/cats/*
            ./to_path/test/dogs/*
    '''
    #参数初始化
    reset=False             if kws.get('reset')     ==None else kws.get('reset')
    split_num='0.6,0.2,0.2' if kws.get('split_num') ==None else kws.get('split_num')
    img_width=150           if kws.get('img_width') ==None else kws.get('img_width')
    img_height=150          if kws.get('img_height')==None else kws.get('img_height')
    batch_size=32           if kws.get('batch_size')==None else kws.get('batch_size')
    enhance=False           if kws.get('enhance')   ==None else kws.get('enhance')
    class_mode='binary'     if kws.get('class_mode')==None else kws.get('class_mode')

    for k,v in kws.items():
        print('%s:%s'%(k,v))
    #删除lab_path
    if reset:
        print('delete folder:%s'%(to_path))
        shutil.rmtree(to_path) if os.path.exists(to_path) else ''
    #图像分集
    if not os.path.exists(to_path):
        print('imgages_split:%s=>%s'%(from_path,to_path))
        funs.images_split(from_path,to_path,"train,valid,test",split_num)
    
    #数据生成器-训练数据集
    if enhance:
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True)
    else:
        train_datagen = ImageDataGenerator(rescale=1./255)
    train_generator = train_datagen.flow_from_directory(
            # This is the target directory
            '%s/train'%(to_path),
            # All images will be resized to 150x150
            target_size=(img_height, img_width),
            batch_size=batch_size,
            # Since we use binary_crossentropy loss, we need binary labels
            class_mode=class_mode)    
    #数据生成器-校验数据集
    valid_datagen = ImageDataGenerator(rescale=1./255)
    valid_generator = valid_datagen.flow_from_directory(
            '%s/valid'%(to_path),
            target_size=(img_height, img_width),
            batch_size=batch_size,
            class_mode=class_mode)    
    #数据生成器-测试数据集(测试集数据不能打乱)
    test_datagen = ImageDataGenerator(rescale=1./255)
    test_generator = test_datagen.flow_from_directory(
            '%s/test'%(to_path),
            target_size=(img_height, img_width),
            batch_size=batch_size,
            shuffle=False, #测试集数据不能打乱
            class_mode=class_mode)
    #返回数据生成器
    return (train_generator,valid_generator,test_generator)
Ejemplo n.º 2
0
def DataGen(from_path,
            to_path,
            split_num,
            img_width=150,
            img_height=150,
            batch_size=32,
            enhance=False):
    '''数据生成器
    @param from_path  源目录,图像以类别名称建立子目录存储,如“cat,dog”
    @param to_path    训练目录,从from_path提取文件存放在to_path对应目录
    @param split_num  训练,校验,测试分集比例(<1)或个数(>1),如:"0.6,0.2,0.2"或"100,20,20"    
    @param img_width  生成图像宽度
    @param img_height 生成图像高度
    @param enhance    是否使用数据增强
    使用范例:
        (train_gen,valid_gen,test_gen)=DataGen("./from","./to","0.6,0.2,0.2",150,150,False)
        或
        (train_gen,valid_gen,test_gen)=DataGen("./from","./to","100,20,20",150,150,False)
        -------
        数据范例:
        源目录:
            ./from_path/cats/*
            ./from_path/dogs/*
        目的目录:
            ./to_path/train/cats/*
            ./to_path/train/dogs/*
            ./to_path/valid/cats/*
            ./to_path/valid/dogs/*
            ./to_path/test/cats/*
            ./to_path/test/dogs/*
    '''
    #图像分集
    funs.images_split(from_path, to_path, "train,valid,test", split_num)

    #数据生成器
    if enhance:
        train_datagen = ImageDataGenerator(
            rescale=1. / 255,
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
        )

    else:
        train_datagen = ImageDataGenerator(rescale=1. / 255)

    valid_datagen = ImageDataGenerator(rescale=1. / 255)

    test_datagen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        '%s/train' % (to_path),
        # All images will be resized to 150x150
        target_size=(img_height, img_width),
        batch_size=batch_size,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')

    valid_generator = test_datagen.flow_from_directory('%s/valid' % (to_path),
                                                       target_size=(img_height,
                                                                    img_width),
                                                       batch_size=batch_size,
                                                       class_mode='binary')

    test_generator = test_datagen.flow_from_directory('%s/test' % (to_path),
                                                      target_size=(img_height,
                                                                   img_width),
                                                      batch_size=batch_size,
                                                      class_mode='binary')
    return (train_generator, valid_generator, test_generator)
Ejemplo n.º 3
0
def main(arg):
    ''' 主函数
    '''
    funs.images_split(arg.src, arg.dst, arg.split_class, arg.split_per)