Beispiel #1
0
def train(model_name, index_dir, train_config_fp, resume_from_checkpoint,
          pretrained_model_fp):
    check_model_name(model_name)
    train_transform = T.Compose([
        RandomStretchAug(min_ratio=0.5, max_ratio=1.5),
        # RandomCrop((8, 10)),
        T.RandomInvert(p=0.2),
        T.RandomApply([T.RandomRotation(degrees=1)], p=0.4),
        # T.RandomAutocontrast(p=0.05),
        # T.RandomPosterize(bits=4, p=0.3),
        # T.RandomAdjustSharpness(sharpness_factor=0.5, p=0.3),
        # T.RandomEqualize(p=0.3),
        # T.RandomApply([T.GaussianBlur(kernel_size=3)], p=0.5),
        NormalizeAug(),
        # RandomPaddingAug(p=0.5, max_pad_len=72),
    ])
    val_transform = NormalizeAug()

    train_config = json.load(open(train_config_fp))

    data_mod = OcrDataModule(
        index_dir=index_dir,
        vocab_fp=train_config['vocab_fp'],
        img_folder=train_config['img_folder'],
        train_transforms=train_transform,
        val_transforms=val_transform,
        batch_size=train_config['batch_size'],
        num_workers=train_config['num_workers'],
        pin_memory=train_config['pin_memory'],
    )

    # train_ds = data_mod.train
    # for i in range(min(100, len(train_ds))):
    #     visualize_example(train_transform(train_ds[i][0]), 'debugs/train-1-%d' % i)
    #     visualize_example(train_transform(train_ds[i][0]), 'debugs/train-2-%d' % i)
    #     visualize_example(train_transform(train_ds[i][0]), 'debugs/train-3-%d' % i)
    # val_ds = data_mod.val
    # for i in range(min(10, len(val_ds))):
    #     visualize_example(val_transform(val_ds[i][0]), 'debugs/val-1-%d' % i)
    #     visualize_example(val_transform(val_ds[i][0]), 'debugs/val-2-%d' % i)
    #     visualize_example(val_transform(val_ds[i][0]), 'debugs/val-2-%d' % i)
    # return

    trainer = PlTrainer(train_config,
                        ckpt_fn=['cnocr',
                                 'v%s' % MODEL_VERSION, model_name])
    model = gen_model(model_name, data_mod.vocab)
    logger.info(model)

    if pretrained_model_fp is not None:
        load_model_params(model, pretrained_model_fp)

    trainer.fit(model,
                datamodule=data_mod,
                resume_from_checkpoint=resume_from_checkpoint)
# ~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomResizedCrop` transform
# (see also :func:`~torchvision.transforms.functional.resized_crop`)
# crops an image at a random location, and then resizes the crop to a given
# size.
resize_cropper = T.RandomResizedCrop(size=(32, 32))
resized_crops = [resize_cropper(orig_img) for _ in range(4)]
plot(resized_crops)

####################################
# RandomInvert
# ~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomInvert` transform
# (see also :func:`~torchvision.transforms.functional.invert`)
# randomly inverts the colors of the given image.
inverter = T.RandomInvert()
invertered_imgs = [inverter(orig_img) for _ in range(4)]
plot(invertered_imgs)

####################################
# RandomPosterize
# ~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomPosterize` transform
# (see also :func:`~torchvision.transforms.functional.posterize`)
# randomly posterizes the image by reducing the number of bits
# of each color channel.
posterizer = T.RandomPosterize(bits=2)
posterized_imgs = [posterizer(orig_img) for _ in range(4)]
plot(posterized_imgs)

####################################
Beispiel #3
0
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.insert(1, os.path.dirname(os.path.abspath(__file__)))

EXAMPLE_DIR = Path(__file__).parent.parent / 'docs/examples'
INDEX_DIR = Path(__file__).parent.parent / 'data/test'
IMAGE_DIR = Path(__file__).parent.parent / 'data/images'

from cnocr import gen_model
from cnocr.data_utils.aug import NormalizeAug
from cnocr.dataset import OcrDataModule
from cnocr.trainer import PlTrainer

train_transform = transforms.Compose(
    [
        transforms.RandomInvert(p=0.5),
        transforms.RandomErasing(p=0.05),
        transforms.RandomRotation(degrees=2),
        transforms.RandomAutocontrast(p=0.05),
        NormalizeAug(),
    ]
)
val_transform = NormalizeAug()


def test_trainer():
    data_mod = OcrDataModule(
        index_dir=INDEX_DIR,
        vocab_fp=EXAMPLE_DIR / 'label_cn.txt',
        img_folder=IMAGE_DIR,
        train_transforms=train_transform,
Beispiel #4
0
def data_augmentation(ToTensor=False,
                      Resize=None,
                      Contrast=None,
                      Equalize=None,
                      HFlip=None,
                      Invert=None,
                      VFlip=None,
                      Rotation=None,
                      Grayscale=None,
                      Perspective=None,
                      Erasing=None,
                      Crop=None):
    '''
    DataAgumentation 2021/03/23 by Mr.w
    -------------------------------------------------------------
    ToTensor : False/True , 注意转为Tensor,通道会放在第一维
    Resize : tuple-->(500,500)
    Contrast : 0-1 -->图像被自动对比度的可能
    Equalize : 0-1 -->图像均衡可能性
    HFlip : 0-1 --> 图像水平翻转
    Invert : 0-1--> 随机翻转
    VFlip : 0-1 --> 图像垂直翻转
    Rotation : 0-360 --> 随机旋转度数范围, as : 90 , [-90,90]
    Grayscale : 0-1 --> 随机转换为灰度图像
    Perspective : 0-1 --> 随机扭曲图像
    Erasing : 0-1 --> 随机擦除
    Crop : tuple --> (500,500)
    -------------------------------------------------------------
    return : transforms.Compose(train_transform) --> 方法汇总
    '''
    #列表导入Compose
    train_transform = []
    if ToTensor == True:
        trans_totensor = transforms.ToTensor()
        train_transform.append(trans_totensor)

    if Resize != None:
        trans_Rsize = transforms.Resize(Resize)  # Resize=(500,500)
        train_transform.append(trans_Rsize)
    if Contrast != None:
        trans_Rcontrast = transforms.RandomAutocontrast(p=Contrast)
        train_transform.append(trans_Rcontrast)
    if Equalize != None:
        trans_REqualize = transforms.RandomEqualize(p=Equalize)
        train_transform.append(trans_REqualize)
    if HFlip != None:
        train_transform.append(transforms.RandomHorizontalFlip(p=HFlip))
    if Invert != None:
        train_transform.append(transforms.RandomInvert(p=Invert))
    if VFlip != None:
        train_transform.append(transforms.RandomVerticalFlip(p=VFlip))
    if Rotation != None:
        train_transform.append(
            transforms.RandomRotation(Rotation,
                                      expand=False,
                                      center=None,
                                      fill=0,
                                      resample=None))
    if Grayscale != None:
        train_transform.append(transforms.RandomGrayscale(p=Grayscale))
    if Perspective != None:
        train_transform.append(
            transforms.RandomPerspective(distortion_scale=0.5,
                                         p=Perspective,
                                         fill=0))
    if Erasing != None:
        train_transform.append(
            transforms.RandomErasing(p=Erasing,
                                     scale=(0.02, 0.33),
                                     ratio=(0.3, 3.3),
                                     value=0,
                                     inplace=False))
    if Crop != None:
        train_transform.append(
            transforms.RandomCrop(Crop,
                                  padding=None,
                                  pad_if_needed=False,
                                  fill=0,
                                  padding_mode='constant'))
    return transforms.Compose(train_transform)