Beispiel #1
0
def main():
	args = parse_args()
	
	cfg = Config.fromfile(args.config)
	# set cudnn_benchmark
	if cfg.get('cudnn_benchmark', False):
		torch.backends.cudnn.benchmark = True
	# update configs according to CLI args
	if args.work_dir is not None:
		cfg.work_dir = args.work_dir
	if args.resume_from is not None:
		cfg.resume_from = args.resume_from
	cfg.gpus = args.gpus
	if cfg.checkpoint_config is not None:
		# save mmdet version in checkpoints as meta data
		cfg.checkpoint_config.meta = dict(
			mmdet_version=__version__, config=cfg.text)
	
	# init distributed env first, since logger depends on the dist info.
	if args.launcher == 'none':
		distributed = False
	else:
		distributed = True
		init_dist(args.launcher, **cfg.dist_params)
	
	# init logger before other steps
	logger = get_root_logger(cfg.log_level)
	logger.info('Distributed training: {}'.format(distributed))
	
	# set random seeds
	if args.seed is not None:
		logger.info('Set random seed to {}'.format(args.seed))
		set_random_seed(args.seed)
	
	model = build_detector(
		cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
	
	train_dataset = get_dataset(cfg.data.train)
	train_detector(
		model,
		train_dataset,
		cfg,
		distributed=distributed,
		validate=args.validate,
		logger=logger)
def build_model(cfg, gpu):
    """
    Return model wrapped in MMDataParallel.
    TODO: support multi-GPU
    """
    if type(gpu) != list:
        gpu = [gpu]

    model_type = cfg["model"].pop("config")
    train_cfg = cfg["model"].pop("train_cfg")
    test_cfg = cfg["model"].pop("test_cfg")

    mmdet_cfg = Config.fromfile(osp.join(_PATH, "models", "{}.py".format(model_type)))

    # Change model parameters
    if cfg["model"] is not None:
        if type(cfg["model"]["bbox_head"]) == list:
            assert len(cfg["model"]["bbox_head"]) == len(mmdet_cfg.model.bbox_head)
            bbox_head_cfg = cfg["model"].pop("bbox_head")
            for ind, i in enumerate(bbox_head_cfg):
                if i is None:
                    continue
                assert type(i) is dict
                assert type(mmdet_cfg.model.bbox_head[ind]) is ConfigDict
                mmdet_cfg.model.bbox_head[ind].update(i)
        mmdet_cfg.model.update(cfg["model"])
    if train_cfg is not None:
        if type(train_cfg["rcnn"]) == list:
            assert len(train_cfg["rcnn"]) == len(mmdet_cfg.train_cfg.rcnn)
            rcnn_cfg = train_cfg.pop("rcnn")
            for ind, i in enumerate(rcnn_cfg):
                if i is None:
                    continue
                assert type(i) is dict
                assert type(mmdet_cfg.train_cfg.rcnn[ind]) is ConfigDict
                mmdet_cfg.train_cfg.rcnn[ind].update(i)
        mmdet_cfg.train_cfg.update(train_cfg)
    if test_cfg is not None:
        mmdet_cfg.test_cfg.update(test_cfg)
    model = build_detector(
        mmdet_cfg.model, train_cfg=mmdet_cfg.train_cfg, test_cfg=mmdet_cfg.test_cfg
    )
    model = MMDataParallel(model, device_ids=gpu)
    return model
Beispiel #3
0
def build_model(cfg, gpu):
    '''
    Return model wrapped in MMDataParallel.
    TODO: support multi-GPU
    '''
    if type(gpu) != list: gpu = [gpu]

    model_type = cfg['model'].pop('config')
    train_cfg = cfg['model'].pop('train_cfg')
    test_cfg = cfg['model'].pop('test_cfg')

    mmdet_cfg = Config.fromfile(osp.join(_PATH, 'models', '{}.py'.format(model_type)))

    # Change model parameters
    if cfg['model'] is not None:
        if type(cfg['model']['bbox_head']) == list:
            assert len(cfg['model']['bbox_head']) == len(mmdet_cfg.model.bbox_head)
            bbox_head_cfg = cfg['model'].pop('bbox_head')
            for ind, i in enumerate(bbox_head_cfg):
                if i is None: continue
                assert type(i) is dict
                assert type(mmdet_cfg.model.bbox_head[ind]) is ConfigDict
                mmdet_cfg.model.bbox_head[ind].update(i)
        mmdet_cfg.model.update(cfg['model'])
    if train_cfg is not None:
        if type(train_cfg['rcnn']) == list:
            assert len(train_cfg['rcnn']) == len(mmdet_cfg.train_cfg.rcnn)
            rcnn_cfg = train_cfg.pop('rcnn')
            for ind, i in enumerate(rcnn_cfg):
                if i is None: continue
                assert type(i) is dict
                assert type(mmdet_cfg.train_cfg.rcnn[ind]) is ConfigDict
                mmdet_cfg.train_cfg.rcnn[ind].update(i)
        mmdet_cfg.train_cfg.update(train_cfg) 
    if test_cfg is not None:
        mmdet_cfg.test_cfg.update(test_cfg) 

    model = build_detector(mmdet_cfg.model, 
        train_cfg=mmdet_cfg.train_cfg, 
        test_cfg=mmdet_cfg.test_cfg)
    model = MMDataParallel(model, device_ids=gpu)
    return model
Beispiel #4
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    if args.ckpt:
        cfg.resume_from = args.ckpt

    cfg.test_cfg.rcnn.score_thr = 0.5

    FOCAL_LENGTH = cfg.get('FOCAL_LENGTH', 1000)

    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmdet_version=__version__,
                                          config=cfg.text,
                                          CLASSES=('Human', ))
    # add an attribute for visualization convenience
    model.CLASSES = ('Human', )

    model = MMDataParallel(model, device_ids=[0]).cuda()

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)

    runner = Runner(model, lambda x: x, optimizer, cfg.work_dir, cfg.log_level)
    runner.resume(cfg.resume_from)
    model = runner.model
    model.eval()
    # necessary for headless rendering
    os.environ['PYOPENGL_PLATFORM'] = 'egl'
    render = Renderer(focal_length=FOCAL_LENGTH)
    img_transform = ImageTransform(size_divisor=32, **img_norm_cfg)
    img_scale = cfg.common_val_cfg.img_scale

    with torch.no_grad():
        folder_name = args.image_folder
        output_folder = args.output_folder
        os.makedirs(output_folder, exist_ok=True)
        images = os.listdir(folder_name)
        for image in images:
            file_name = osp.join(folder_name, image)
            img = cv2.imread(file_name)
            ori_shape = img.shape

            img, img_shape, pad_shape, scale_factor = img_transform(
                img, img_scale)

            # Force padding for the issue of multi-GPU training
            padded_img = np.zeros((img.shape[0], img_scale[1], img_scale[0]),
                                  dtype=img.dtype)
            padded_img[:, :img.shape[-2], :img.shape[-1]] = img
            img = padded_img

            assert img.shape[1] == 512 and img.shape[
                2] == 832, "Image shape incorrect"

            data_batch = dict(
                img=DC([to_tensor(img[None, ...])], stack=True),
                img_meta=DC([{
                    'img_shape': img_shape,
                    'scale_factor': scale_factor,
                    'flip': False,
                    'ori_shape': ori_shape
                }],
                            cpu_only=True),
            )
            bbox_results, pred_results = model(**data_batch, return_loss=False)

            if pred_results is not None:
                pred_results['bboxes'] = bbox_results[0]
                img = denormalize(img)
                img_viz = prepare_dump(pred_results, img, render, bbox_results,
                                       FOCAL_LENGTH)
                cv2.imwrite(
                    f'{file_name.replace(folder_name, output_folder)}.output.jpg',
                    img_viz[:, :, ::-1])
Beispiel #5
0
def test_dic_model():
    pretrained = 'https://download.openmmlab.com/mmediting/' + \
        'restorers/dic/light_cnn_feature.pth'

    model_cfg_pre = dict(type='DIC',
                         generator=dict(type='DICNet',
                                        in_channels=3,
                                        out_channels=3,
                                        mid_channels=48),
                         pixel_loss=dict(type='L1Loss',
                                         loss_weight=1.0,
                                         reduction='mean'),
                         align_loss=dict(type='MSELoss',
                                         loss_weight=0.1,
                                         reduction='mean'))

    model_cfg = dict(type='DIC',
                     generator=dict(type='DICNet',
                                    in_channels=3,
                                    out_channels=3,
                                    mid_channels=48),
                     discriminator=dict(type='LightCNN', in_channels=3),
                     pixel_loss=dict(type='L1Loss',
                                     loss_weight=1.0,
                                     reduction='mean'),
                     align_loss=dict(type='MSELoss',
                                     loss_weight=0.1,
                                     reduction='mean'),
                     feature_loss=dict(type='LightCNNFeatureLoss',
                                       pretrained=pretrained,
                                       loss_weight=0.1,
                                       criterion='l1'),
                     gan_loss=dict(type='GANLoss',
                                   gan_type='vanilla',
                                   loss_weight=0.005,
                                   real_label_val=1.0,
                                   fake_label_val=0))

    scale = 8
    train_cfg = None
    test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale))

    # build restorer
    build_model(model_cfg_pre, train_cfg=train_cfg, test_cfg=test_cfg)
    restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg)

    # test attributes
    assert restorer.__class__.__name__ == 'DIC'

    # prepare data
    inputs = torch.rand(1, 3, 16, 16)
    targets = torch.rand(1, 3, 128, 128)
    heatmap = torch.rand(1, 68, 32, 32)
    data_batch = {'lq': inputs, 'gt': targets, 'heatmap': heatmap}

    # prepare optimizer
    optim_cfg = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999))
    generator = obj_from_dict(optim_cfg, torch.optim,
                              dict(params=restorer.parameters()))
    discriminator = obj_from_dict(optim_cfg, torch.optim,
                                  dict(params=restorer.parameters()))
    optimizer = dict(generator=generator, discriminator=discriminator)

    # test train_step and forward_test (cpu)
    outputs = restorer.train_step(data_batch, optimizer)
    assert isinstance(outputs, dict)
    assert isinstance(outputs['log_vars'], dict)
    assert isinstance(outputs['log_vars']['loss_pixel_v3'], float)
    assert outputs['num_samples'] == 1
    assert outputs['results']['lq'].shape == data_batch['lq'].shape
    assert outputs['results']['gt'].shape == data_batch['gt'].shape
    assert torch.is_tensor(outputs['results']['output'])
    assert outputs['results']['output'].size() == (1, 3, 128, 128)

    # test train_step and forward_test (gpu)
    if torch.cuda.is_available():
        restorer = restorer.cuda()
        data_batch = {
            'lq': inputs.cuda(),
            'gt': targets.cuda(),
            'heatmap': heatmap.cuda()
        }

        # train_step
        optim_cfg = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999))
        generator = obj_from_dict(optim_cfg, torch.optim,
                                  dict(params=restorer.parameters()))
        discriminator = obj_from_dict(optim_cfg, torch.optim,
                                      dict(params=restorer.parameters()))
        optimizer = dict(generator=generator, discriminator=discriminator)
        outputs = restorer.train_step(data_batch, optimizer)
        assert isinstance(outputs, dict)
        assert isinstance(outputs['log_vars'], dict)
        assert isinstance(outputs['log_vars']['loss_pixel_v3'], float)
        assert outputs['num_samples'] == 1
        assert outputs['results']['lq'].shape == data_batch['lq'].shape
        assert outputs['results']['gt'].shape == data_batch['gt'].shape
        assert torch.is_tensor(outputs['results']['output'])
        assert outputs['results']['output'].size() == (1, 3, 128, 128)

        # val_step
        data_batch.pop('heatmap')
        result = restorer.val_step(data_batch, meta=[{'gt_path': ''}])
        assert isinstance(result, dict)
        assert isinstance(result['eval_result'], dict)
        assert result['eval_result'].keys() == set({'PSNR', 'SSIM'})
        assert isinstance(result['eval_result']['PSNR'], np.float64)
        assert isinstance(result['eval_result']['SSIM'], np.float64)

        with pytest.raises(AssertionError):
            # evaluation with metrics must have gt images
            restorer(lq=inputs.cuda(), test_mode=True)

        with pytest.raises(TypeError):
            restorer.init_weights(pretrained=1)
        with pytest.raises(OSError):
            restorer.init_weights(pretrained='')
Beispiel #6
0
def test_ttsr():
    model_cfg = dict(type='TTSR',
                     generator=dict(type='TTSRNet',
                                    in_channels=3,
                                    out_channels=3,
                                    mid_channels=64,
                                    num_blocks=(16, 16, 8, 4)),
                     extractor=dict(type='LTE'),
                     transformer=dict(type='SearchTransformer'),
                     discriminator=dict(type='TTSRDiscriminator', in_size=64),
                     pixel_loss=dict(type='L1Loss',
                                     loss_weight=1.0,
                                     reduction='mean'),
                     perceptual_loss=dict(type='PerceptualLoss',
                                          layer_weights={'29': 1.0},
                                          vgg_type='vgg19',
                                          perceptual_weight=1e-2,
                                          style_weight=0.001,
                                          criterion='mse'),
                     transferal_perceptual_loss=dict(
                         type='TransferalPerceptualLoss',
                         loss_weight=1e-2,
                         use_attention=False,
                         criterion='mse'),
                     gan_loss=dict(type='GANLoss',
                                   gan_type='vanilla',
                                   loss_weight=1e-3,
                                   real_label_val=1.0,
                                   fake_label_val=0))

    scale = 4
    train_cfg = None
    test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale))

    # build restorer
    restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg)

    model_cfg = dict(type='TTSR',
                     generator=dict(type='TTSRNet',
                                    in_channels=3,
                                    out_channels=3,
                                    mid_channels=64,
                                    num_blocks=(16, 16, 8, 4)),
                     extractor=dict(type='LTE'),
                     transformer=dict(type='SearchTransformer'),
                     discriminator=dict(type='TTSRDiscriminator', in_size=64),
                     pixel_loss=dict(type='L1Loss',
                                     loss_weight=1.0,
                                     reduction='mean'),
                     perceptual_loss=dict(type='PerceptualLoss',
                                          layer_weights={'29': 1.0},
                                          vgg_type='vgg19',
                                          perceptual_weight=1e-2,
                                          style_weight=0.001,
                                          criterion='mse'),
                     transferal_perceptual_loss=dict(
                         type='TransferalPerceptualLoss',
                         loss_weight=1e-2,
                         use_attention=False,
                         criterion='mse'),
                     gan_loss=dict(type='GANLoss',
                                   gan_type='vanilla',
                                   loss_weight=1e-3,
                                   real_label_val=1.0,
                                   fake_label_val=0))

    scale = 4
    train_cfg = None
    test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale))

    # build restorer
    restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg)

    # test attributes
    assert restorer.__class__.__name__ == 'TTSR'

    # prepare data
    inputs = torch.rand(1, 3, 16, 16)
    targets = torch.rand(1, 3, 64, 64)
    ref = torch.rand(1, 3, 64, 64)
    data_batch = {
        'lq': inputs,
        'gt': targets,
        'ref': ref,
        'lq_up': ref,
        'ref_downup': ref
    }

    # prepare optimizer
    optim_cfg_g = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999))
    optim_cfg_d = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999))
    optimizer = dict(
        generator=obj_from_dict(optim_cfg_g, torch.optim,
                                dict(params=restorer.parameters())),
        discriminator=obj_from_dict(optim_cfg_d, torch.optim,
                                    dict(params=restorer.parameters())))

    # test train_step and forward_test (cpu)
    outputs = restorer.train_step(data_batch, optimizer)
    assert isinstance(outputs, dict)
    assert isinstance(outputs['log_vars'], dict)
    assert isinstance(outputs['log_vars']['loss_pix'], float)
    assert outputs['num_samples'] == 1
    assert outputs['results']['lq'].shape == data_batch['lq'].shape
    assert outputs['results']['gt'].shape == data_batch['gt'].shape
    assert torch.is_tensor(outputs['results']['output'])
    assert outputs['results']['output'].size() == (1, 3, 64, 64)

    # test train_step and forward_test (gpu)
    if torch.cuda.is_available():
        restorer = restorer.cuda()
        data_batch = {
            'lq': inputs.cuda(),
            'gt': targets.cuda(),
            'ref': ref.cuda(),
            'lq_up': ref.cuda(),
            'ref_downup': ref.cuda()
        }

        # train_step
        optimizer = dict(
            generator=obj_from_dict(optim_cfg_g, torch.optim,
                                    dict(params=restorer.parameters())),
            discriminator=obj_from_dict(optim_cfg_d, torch.optim,
                                        dict(params=restorer.parameters())))
        outputs = restorer.train_step(data_batch, optimizer)
        assert isinstance(outputs, dict)
        assert isinstance(outputs['log_vars'], dict)
        assert isinstance(outputs['log_vars']['loss_pix'], float)
        assert outputs['num_samples'] == 1
        assert outputs['results']['lq'].shape == data_batch['lq'].shape
        assert outputs['results']['gt'].shape == data_batch['gt'].shape
        assert torch.is_tensor(outputs['results']['output'])
        assert outputs['results']['output'].size() == (1, 3, 64, 64)

        # val_step
        result = restorer.val_step(data_batch, meta=[{'gt_path': ''}])
        assert isinstance(result, dict)
        assert isinstance(result['eval_result'], dict)
        assert result['eval_result'].keys() == set({'PSNR', 'SSIM'})
        assert isinstance(result['eval_result']['PSNR'], np.float64)
        assert isinstance(result['eval_result']['SSIM'], np.float64)
Beispiel #7
0
import numpy as np
import torch

from mmcv.utils.config import Config, ConfigDict
from mmcv.parallel import MMDataParallel
from mmdet.models.builder import build_detector

from factory import builder

mmdet_cfg = Config.fromfile('factory/models/SERetinaNeXt50.py')

model = build_detector(mmdet_cfg.model, mmdet_cfg.train_cfg,
                       mmdet_cfg.test_cfg)

model = MMDataParallel(model, device_ids=[0])

X = np.ones(([2, 3, 512, 512]))
X = torch.from_numpy(X).float()

img_meta = {0: {'img_shape': (3, 512, 512), 'scale_factor': 1., 'flip': False}}

y_pred = []
for x in X:
    y_pred.append(
        model([x.unsqueeze(0)], img_meta=[img_meta], return_loss=False))
Beispiel #8
0
    def __init__(self,
                 det='PANet_IC15',
                 det_config='',
                 det_ckpt='',
                 recog='SEG',
                 recog_config='',
                 recog_ckpt='',
                 kie='',
                 kie_config='',
                 kie_ckpt='',
                 config_dir=os.path.join(str(Path.cwd()), 'configs/'),
                 device=None,
                 **kwargs):

        textdet_models = {
            'DB_r18': {
                'config':
                'dbnet/dbnet_r18_fpnc_1200e_icdar2015.py',
                'ckpt':
                'dbnet/'
                'dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth'
            },
            'DB_r50': {
                'config':
                'dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py',
                'ckpt':
                'dbnet/'
                'dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20211025-9fe3b590.pth'
            },
            'DBPP_r50': {
                'config':
                'dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py',
                'ckpt':
                'dbnet/'
                'dbnetpp_r50dcnv2_fpnc_1200e_icdar2015-20220502-d7a76fff.pth'
            },
            'DRRG': {
                'config':
                'drrg/drrg_r50_fpn_unet_1200e_ctw1500.py',
                'ckpt':
                'drrg/drrg_r50_fpn_unet_1200e_ctw1500_20211022-fb30b001.pth'
            },
            'FCE_IC15': {
                'config':
                'fcenet/fcenet_r50_fpn_1500e_icdar2015.py',
                'ckpt':
                'fcenet/fcenet_r50_fpn_1500e_icdar2015_20211022-daefb6ed.pth'
            },
            'FCE_CTW_DCNv2': {
                'config':
                'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py',
                'ckpt':
                'fcenet/' +
                'fcenet_r50dcnv2_fpn_1500e_ctw1500_20211022-e326d7ec.pth'
            },
            'MaskRCNN_CTW': {
                'config':
                'maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py',
                'ckpt':
                'maskrcnn/'
                'mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth'
            },
            'MaskRCNN_IC15': {
                'config':
                'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py',
                'ckpt':
                'maskrcnn/'
                'mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth'
            },
            'MaskRCNN_IC17': {
                'config':
                'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py',
                'ckpt':
                'maskrcnn/'
                'mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth'
            },
            'PANet_CTW': {
                'config':
                'panet/panet_r18_fpem_ffm_600e_ctw1500.py',
                'ckpt':
                'panet/'
                'panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth'
            },
            'PANet_IC15': {
                'config':
                'panet/panet_r18_fpem_ffm_600e_icdar2015.py',
                'ckpt':
                'panet/'
                'panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth'
            },
            'PS_CTW': {
                'config': 'psenet/psenet_r50_fpnf_600e_ctw1500.py',
                'ckpt':
                'psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth'
            },
            'PS_IC15': {
                'config':
                'psenet/psenet_r50_fpnf_600e_icdar2015.py',
                'ckpt':
                'psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth'
            },
            'TextSnake': {
                'config':
                'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py',
                'ckpt':
                'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth'
            },
            'Tesseract': {}
        }

        textrecog_models = {
            'CRNN': {
                'config': 'crnn/crnn_academic_dataset.py',
                'ckpt': 'crnn/crnn_academic-a723a1c5.pth'
            },
            'SAR': {
                'config': 'sar/sar_r31_parallel_decoder_academic.py',
                'ckpt': 'sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth'
            },
            'SAR_CN': {
                'config':
                'sar/sar_r31_parallel_decoder_chinese.py',
                'ckpt':
                'sar/sar_r31_parallel_decoder_chineseocr_20210507-b4be8214.pth'
            },
            'NRTR_1/16-1/8': {
                'config': 'nrtr/nrtr_r31_1by16_1by8_academic.py',
                'ckpt':
                'nrtr/nrtr_r31_1by16_1by8_academic_20211124-f60cebf4.pth'
            },
            'NRTR_1/8-1/4': {
                'config': 'nrtr/nrtr_r31_1by8_1by4_academic.py',
                'ckpt':
                'nrtr/nrtr_r31_1by8_1by4_academic_20211123-e1fdb322.pth'
            },
            'RobustScanner': {
                'config': 'robust_scanner/robustscanner_r31_academic.py',
                'ckpt': 'robustscanner/robustscanner_r31_academic-5f05874f.pth'
            },
            'SATRN': {
                'config': 'satrn/satrn_academic.py',
                'ckpt': 'satrn/satrn_academic_20211009-cb8b1580.pth'
            },
            'SATRN_sm': {
                'config': 'satrn/satrn_small.py',
                'ckpt': 'satrn/satrn_small_20211009-2cf13355.pth'
            },
            'ABINet': {
                'config': 'abinet/abinet_academic.py',
                'ckpt': 'abinet/abinet_academic-f718abf6.pth'
            },
            'SEG': {
                'config': 'seg/seg_r31_1by16_fpnocr_academic.py',
                'ckpt': 'seg/seg_r31_1by16_fpnocr_academic-72235b11.pth'
            },
            'CRNN_TPS': {
                'config': 'tps/crnn_tps_academic_dataset.py',
                'ckpt': 'tps/crnn_tps_academic_dataset_20210510-d221a905.pth'
            },
            'Tesseract': {},
            'MASTER': {
                'config': 'master/master_academic.py',
                'ckpt': 'master/master_r31_12e_ST_MJ_SA-787edd36.pth'
            }
        }

        kie_models = {
            'SDMGR': {
                'config': 'sdmgr/sdmgr_unet16_60e_wildreceipt.py',
                'ckpt':
                'sdmgr/sdmgr_unet16_60e_wildreceipt_20210520-7489e6de.pth'
            }
        }

        self.td = det
        self.tr = recog
        self.kie = kie
        self.device = device
        if self.device is None:
            self.device = torch.device(
                'cuda' if torch.cuda.is_available() else 'cpu')

        # Check if the det/recog model choice is valid
        if self.td and self.td not in textdet_models:
            raise ValueError(self.td,
                             'is not a supported text detection algorthm')
        elif self.tr and self.tr not in textrecog_models:
            raise ValueError(self.tr,
                             'is not a supported text recognition algorithm')
        elif self.kie:
            if self.kie not in kie_models:
                raise ValueError(
                    self.kie, 'is not a supported key information extraction'
                    ' algorithm')
            elif not (self.td and self.tr):
                raise NotImplementedError(
                    self.kie, 'has to run together'
                    ' with text detection and recognition algorithms.')

        self.detect_model = None
        if self.td and self.td == 'Tesseract':
            if tesserocr is None:
                raise ImportError('Please install tesserocr first. '
                                  'Check out the installation guide at '
                                  'https://github.com/sirfz/tesserocr')
            self.detect_model = 'Tesseract_det'
        elif self.td:
            # Build detection model
            if not det_config:
                det_config = os.path.join(config_dir, 'textdet/',
                                          textdet_models[self.td]['config'])
            if not det_ckpt:
                det_ckpt = 'https://download.openmmlab.com/mmocr/textdet/' + \
                    textdet_models[self.td]['ckpt']

            self.detect_model = init_detector(det_config,
                                              det_ckpt,
                                              device=self.device)
            self.detect_model = revert_sync_batchnorm(self.detect_model)

        self.recog_model = None
        if self.tr and self.tr == 'Tesseract':
            if tesserocr is None:
                raise ImportError('Please install tesserocr first. '
                                  'Check out the installation guide at '
                                  'https://github.com/sirfz/tesserocr')
            self.recog_model = 'Tesseract_recog'
        elif self.tr:
            # Build recognition model
            if not recog_config:
                recog_config = os.path.join(
                    config_dir, 'textrecog/',
                    textrecog_models[self.tr]['config'])
            if not recog_ckpt:
                recog_ckpt = 'https://download.openmmlab.com/mmocr/' + \
                    'textrecog/' + textrecog_models[self.tr]['ckpt']

            self.recog_model = init_detector(recog_config,
                                             recog_ckpt,
                                             device=self.device)
            self.recog_model = revert_sync_batchnorm(self.recog_model)

        self.kie_model = None
        if self.kie:
            # Build key information extraction model
            if not kie_config:
                kie_config = os.path.join(config_dir, 'kie/',
                                          kie_models[self.kie]['config'])
            if not kie_ckpt:
                kie_ckpt = 'https://download.openmmlab.com/mmocr/' + \
                    'kie/' + kie_models[self.kie]['ckpt']

            kie_cfg = Config.fromfile(kie_config)
            self.kie_model = build_detector(kie_cfg.model,
                                            test_cfg=kie_cfg.get('test_cfg'))
            self.kie_model = revert_sync_batchnorm(self.kie_model)
            self.kie_model.cfg = kie_cfg
            load_checkpoint(self.kie_model, kie_ckpt, map_location=self.device)

        # Attribute check
        for model in list(filter(None, [self.recog_model, self.detect_model])):
            if hasattr(model, 'module'):
                model = model.module
Beispiel #9
0
    def _file2dict(filename):

        print('_file2dict:', filename)
        if 'http' in filename:
            # import pdb; pdb.set_trace()
            temp_filename = _gettemp_pyfile()
            assert filename.endswith('.py')
            result = os.system(f'wget {filename} -O {temp_filename}')
            assert result == 0, f"Cannot download: {filename}"
            # Update filename
            filename = temp_filename

        filename = os.path.abspath(os.path.expanduser(filename))
        check_file_exist(filename)
        if filename.endswith('.py'):
            with tempfile.TemporaryDirectory() as temp_config_dir:
                temp_config_file = tempfile.NamedTemporaryFile(
                    dir=temp_config_dir, suffix='.py')
                temp_config_name = os.path.basename(temp_config_file.name)
                shutil.copyfile(
                    filename, os.path.join(temp_config_dir, temp_config_name))
                temp_module_name = os.path.splitext(temp_config_name)[0]
                sys.path.insert(0, temp_config_dir)
                Config._validate_py_syntax(filename)
                mod = import_module(temp_module_name)
                sys.path.pop(0)
                cfg_dict = {
                    name: value
                    for name, value in mod.__dict__.items()
                    if not name.startswith('__')
                }
                # delete imported module
                del sys.modules[temp_module_name]
                # close temp file
                temp_config_file.close()
        elif filename.endswith(('.yml', '.yaml', '.json')):
            import mmcv
            cfg_dict = mmcv.load(filename)
        else:
            raise IOError('Only py/yml/yaml/json type are supported now!')

        cfg_text = filename + '\n'
        with open(filename, 'r') as f:
            cfg_text += f.read()

        if BASE_KEY in cfg_dict:
            cfg_dir = os.path.dirname(filename)
            base_filename = cfg_dict.pop(BASE_KEY)
            base_filename = base_filename if isinstance(
                base_filename, list) else [base_filename]

            cfg_dict_list = list()
            cfg_text_list = list()
            for f in base_filename:
                base_path = os.path.join(cfg_dir, f) if not 'http' in f else f
                _cfg_dict, _cfg_text = PublicConfig._file2dict(base_path)
                cfg_dict_list.append(_cfg_dict)
                cfg_text_list.append(_cfg_text)

            base_cfg_dict = dict()
            for c in cfg_dict_list:
                if len(base_cfg_dict.keys() & c.keys()) > 0:
                    raise KeyError('Duplicate key is not allowed among bases')
                base_cfg_dict.update(c)

            base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict)
            cfg_dict = base_cfg_dict

            # merge cfg_text
            cfg_text_list.append(cfg_text)
            cfg_text = '\n'.join(cfg_text_list)

        return cfg_dict, cfg_text
Beispiel #10
0
def test_liif():

    model_cfg = dict(type='LIIF',
                     generator=dict(type='LIIFEDSR',
                                    encoder=dict(type='EDSR',
                                                 in_channels=3,
                                                 out_channels=3,
                                                 mid_channels=64,
                                                 num_blocks=16),
                                    imnet=dict(
                                        type='MLPRefiner',
                                        in_dim=64,
                                        out_dim=3,
                                        hidden_list=[256, 256, 256, 256]),
                                    local_ensemble=True,
                                    feat_unfold=True,
                                    cell_decode=True,
                                    eval_bsize=30000),
                     rgb_mean=(0.4488, 0.4371, 0.4040),
                     rgb_std=(1., 1., 1.),
                     pixel_loss=dict(type='L1Loss',
                                     loss_weight=1.0,
                                     reduction='mean'))

    scale_max = 4
    train_cfg = None
    test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale_max))

    # build restorer
    restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg)

    # test attributes
    assert restorer.__class__.__name__ == 'LIIF'

    # prepare data
    inputs = torch.rand(1, 3, 22, 11)
    targets = torch.rand(1, 128 * 64, 3)
    coord = torch.rand(1, 128 * 64, 2)
    cell = torch.rand(1, 128 * 64, 2)
    data_batch = {'lq': inputs, 'gt': targets, 'coord': coord, 'cell': cell}

    # prepare optimizer
    optim_cfg = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999))
    optimizer = obj_from_dict(optim_cfg, torch.optim,
                              dict(params=restorer.parameters()))

    # test train_step and forward_test (cpu)
    outputs = restorer.train_step(data_batch, optimizer)
    assert isinstance(outputs, dict)
    assert isinstance(outputs['log_vars'], dict)
    assert isinstance(outputs['log_vars']['loss_pix'], float)
    assert outputs['num_samples'] == 1
    assert outputs['results']['lq'].shape == data_batch['lq'].shape
    assert outputs['results']['gt'].shape == data_batch['gt'].shape
    assert torch.is_tensor(outputs['results']['output'])
    assert outputs['results']['output'].size() == (1, 128 * 64, 3)

    # test train_step and forward_test (gpu)
    if torch.cuda.is_available():
        restorer = restorer.cuda()
        data_batch = {
            'lq': inputs.cuda(),
            'gt': targets.cuda(),
            'coord': coord.cuda(),
            'cell': cell.cuda()
        }

        # train_step
        optimizer = obj_from_dict(optim_cfg, torch.optim,
                                  dict(params=restorer.parameters()))
        outputs = restorer.train_step(data_batch, optimizer)
        assert isinstance(outputs, dict)
        assert isinstance(outputs['log_vars'], dict)
        assert isinstance(outputs['log_vars']['loss_pix'], float)
        assert outputs['num_samples'] == 1
        assert outputs['results']['lq'].shape == data_batch['lq'].shape
        assert outputs['results']['gt'].shape == data_batch['gt'].shape
        assert torch.is_tensor(outputs['results']['output'])
        assert outputs['results']['output'].size() == (1, 128 * 64, 3)

        # val_step
        result = restorer.val_step(data_batch, meta=[{'gt_path': ''}])
        assert isinstance(result, dict)
        assert isinstance(result['eval_result'], dict)
        assert result['eval_result'].keys() == set({'PSNR', 'SSIM'})
        assert isinstance(result['eval_result']['PSNR'], np.float64)
        assert isinstance(result['eval_result']['SSIM'], np.float64)
Beispiel #11
0
    def __init__(self,
                 det='PANet_IC15',
                 det_config='',
                 det_ckpt='',
                 recog='SEG',
                 recog_config='',
                 recog_ckpt='',
                 kie='',
                 kie_config='',
                 kie_ckpt='',
                 config_dir=os.path.join(str(Path.cwd()), 'configs/'),
                 device='cuda:0',
                 **kwargs):

        textdet_models = {
            'DB_r18': {
                'config':
                'dbnet/dbnet_r18_fpnc_1200e_icdar2015.py',
                'ckpt':
                'dbnet/'
                'dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth'
            },
            'DB_r50': {
                'config':
                'dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py',
                'ckpt':
                'dbnet/'
                'dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20210325-91cef9af.pth'
            },
            'DRRG': {
                'config': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500.py',
                'ckpt': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500-1abf4f67.pth'
            },
            'FCE_IC15': {
                'config': 'fcenet/fcenet_r50_fpn_1500e_icdar2015.py',
                'ckpt': 'fcenet/fcenet_r50_fpn_1500e_icdar2015-d435c061.pth'
            },
            'FCE_CTW_DCNv2': {
                'config': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py',
                'ckpt': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500-05d740bb.pth'
            },
            'MaskRCNN_CTW': {
                'config':
                'maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py',
                'ckpt':
                'maskrcnn/'
                'mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth'
            },
            'MaskRCNN_IC15': {
                'config':
                'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py',
                'ckpt':
                'maskrcnn/'
                'mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth'
            },
            'MaskRCNN_IC17': {
                'config':
                'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py',
                'ckpt':
                'maskrcnn/'
                'mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth'
            },
            'PANet_CTW': {
                'config':
                'panet/panet_r18_fpem_ffm_600e_ctw1500.py',
                'ckpt':
                'panet/'
                'panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth'
            },
            'PANet_IC15': {
                'config':
                'panet/panet_r18_fpem_ffm_600e_icdar2015.py',
                'ckpt':
                'panet/'
                'panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth'
            },
            'PS_CTW': {
                'config': 'psenet/psenet_r50_fpnf_600e_ctw1500.py',
                'ckpt':
                'psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth'
            },
            'PS_IC15': {
                'config':
                'psenet/psenet_r50_fpnf_600e_icdar2015.py',
                'ckpt':
                'psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth'
            },
            'TextSnake': {
                'config':
                'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py',
                'ckpt':
                'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth'
            }
        }

        textrecog_models = {
            'CRNN': {
                'config': 'crnn/crnn_academic_dataset.py',
                'ckpt': 'crnn/crnn_academic-a723a1c5.pth'
            },
            'SAR': {
                'config': 'sar/sar_r31_parallel_decoder_academic.py',
                'ckpt': 'sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth'
            },
            'NRTR_1/16-1/8': {
                'config': 'nrtr/nrtr_r31_1by16_1by8_academic.py',
                'ckpt': 'nrtr/nrtr_r31_academic_20210406-954db95e.pth'
            },
            'NRTR_1/8-1/4': {
                'config': 'nrtr/nrtr_r31_1by8_1by4_academic.py',
                'ckpt':
                'nrtr/nrtr_r31_1by8_1by4_academic_20210406-ce16e7cc.pth'
            },
            'RobustScanner': {
                'config': 'robust_scanner/robustscanner_r31_academic.py',
                'ckpt': 'robustscanner/robustscanner_r31_academic-5f05874f.pth'
            },
            'SEG': {
                'config': 'seg/seg_r31_1by16_fpnocr_academic.py',
                'ckpt': 'seg/seg_r31_1by16_fpnocr_academic-72235b11.pth'
            },
            'CRNN_TPS': {
                'config': 'tps/crnn_tps_academic_dataset.py',
                'ckpt': 'tps/crnn_tps_academic_dataset_20210510-d221a905.pth'
            }
        }

        kie_models = {
            'SDMGR': {
                'config': 'sdmgr/sdmgr_unet16_60e_wildreceipt.py',
                'ckpt':
                'sdmgr/sdmgr_unet16_60e_wildreceipt_20210520-7489e6de.pth'
            }
        }

        self.td = det
        self.tr = recog
        self.kie = kie
        self.device = device

        # Check if the det/recog model choice is valid
        if self.td and self.td not in textdet_models:
            raise ValueError(self.td,
                             'is not a supported text detection algorthm')
        elif self.tr and self.tr not in textrecog_models:
            raise ValueError(self.tr,
                             'is not a supported text recognition algorithm')
        elif self.kie and self.kie not in kie_models:
            raise ValueError(
                self.kie, 'is not a supported key information extraction'
                ' algorithm')

        self.detect_model = None
        if self.td:
            # Build detection model
            if not det_config:
                det_config = os.path.join(config_dir, 'textdet/',
                                          textdet_models[self.td]['config'])
            if not det_ckpt:
                det_ckpt = 'https://download.openmmlab.com/mmocr/textdet/' + \
                    textdet_models[self.td]['ckpt']

            self.detect_model = init_detector(det_config,
                                              det_ckpt,
                                              device=self.device)

        self.recog_model = None
        if self.tr:
            # Build recognition model
            if not recog_config:
                recog_config = os.path.join(
                    config_dir, 'textrecog/',
                    textrecog_models[self.tr]['config'])
            if not recog_ckpt:
                recog_ckpt = 'https://download.openmmlab.com/mmocr/' + \
                    'textrecog/' + textrecog_models[self.tr]['ckpt']

            self.recog_model = init_detector(recog_config,
                                             recog_ckpt,
                                             device=self.device)

        self.kie_model = None
        if self.kie:
            # Build key information extraction model
            if not kie_config:
                kie_config = os.path.join(config_dir, 'kie/',
                                          kie_models[self.kie]['config'])
            if not kie_ckpt:
                kie_ckpt = 'https://download.openmmlab.com/mmocr/' + \
                    'kie/' + kie_models[self.kie]['ckpt']

            kie_cfg = Config.fromfile(kie_config)
            self.kie_model = build_detector(kie_cfg.model,
                                            test_cfg=kie_cfg.get('test_cfg'))
            self.kie_model.cfg = kie_cfg
            load_checkpoint(self.kie_model, kie_ckpt, map_location=self.device)

        # Attribute check
        for model in list(filter(None, [self.recog_model, self.detect_model])):
            if hasattr(model, 'module'):
                model = model.module
            if model.cfg.data.test['type'] == 'ConcatDataset':
                model.cfg.data.test.pipeline = \
                    model.cfg.data.test['datasets'][0].pipeline
Beispiel #12
0
if __name__ == "__main__":
    # Test backend
    backend = GCSBackend("determined-ai-coco-dataset")
    img_bytes = backend.get("annotations2017/instances_val2017.json")
    import os

    with open("/tmp/instances_val2017.json", "wb") as f:
        f.write(img_bytes)
    print("done")

    # Test dataloader
    from mmcv import Config
    from mmdet.models import build_detector
    from mmcv.runner import load_checkpoint

    cfg = Config.fromfile("configs/retinanet/retinanet_r50_fpn_1x_coco.py")
    sub_backend("gcs", cfg)
    cfg.data.val.ann_file = "/tmp/instances_val2017.json"
    cfg.data.val.test_mode = True

    model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
    checkpoint = load_checkpoint(
        model, "./retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth"
    )
    model.cuda()
    model.eval()

    dataset, data_loader = build_dataloader(cfg.data.val, 1, 1, 8, False)

    # from mmdet.core import encode_mask_results
    # results = []