def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if cfg.checkpoint_config is not None: # save mmdet version in checkpoints as meta data cfg.checkpoint_config.meta = dict( mmdet_version=__version__, config=cfg.text) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info('Distributed training: {}'.format(distributed)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) train_dataset = get_dataset(cfg.data.train) train_detector( model, train_dataset, cfg, distributed=distributed, validate=args.validate, logger=logger)
def build_model(cfg, gpu): """ Return model wrapped in MMDataParallel. TODO: support multi-GPU """ if type(gpu) != list: gpu = [gpu] model_type = cfg["model"].pop("config") train_cfg = cfg["model"].pop("train_cfg") test_cfg = cfg["model"].pop("test_cfg") mmdet_cfg = Config.fromfile(osp.join(_PATH, "models", "{}.py".format(model_type))) # Change model parameters if cfg["model"] is not None: if type(cfg["model"]["bbox_head"]) == list: assert len(cfg["model"]["bbox_head"]) == len(mmdet_cfg.model.bbox_head) bbox_head_cfg = cfg["model"].pop("bbox_head") for ind, i in enumerate(bbox_head_cfg): if i is None: continue assert type(i) is dict assert type(mmdet_cfg.model.bbox_head[ind]) is ConfigDict mmdet_cfg.model.bbox_head[ind].update(i) mmdet_cfg.model.update(cfg["model"]) if train_cfg is not None: if type(train_cfg["rcnn"]) == list: assert len(train_cfg["rcnn"]) == len(mmdet_cfg.train_cfg.rcnn) rcnn_cfg = train_cfg.pop("rcnn") for ind, i in enumerate(rcnn_cfg): if i is None: continue assert type(i) is dict assert type(mmdet_cfg.train_cfg.rcnn[ind]) is ConfigDict mmdet_cfg.train_cfg.rcnn[ind].update(i) mmdet_cfg.train_cfg.update(train_cfg) if test_cfg is not None: mmdet_cfg.test_cfg.update(test_cfg) model = build_detector( mmdet_cfg.model, train_cfg=mmdet_cfg.train_cfg, test_cfg=mmdet_cfg.test_cfg ) model = MMDataParallel(model, device_ids=gpu) return model
def build_model(cfg, gpu): ''' Return model wrapped in MMDataParallel. TODO: support multi-GPU ''' if type(gpu) != list: gpu = [gpu] model_type = cfg['model'].pop('config') train_cfg = cfg['model'].pop('train_cfg') test_cfg = cfg['model'].pop('test_cfg') mmdet_cfg = Config.fromfile(osp.join(_PATH, 'models', '{}.py'.format(model_type))) # Change model parameters if cfg['model'] is not None: if type(cfg['model']['bbox_head']) == list: assert len(cfg['model']['bbox_head']) == len(mmdet_cfg.model.bbox_head) bbox_head_cfg = cfg['model'].pop('bbox_head') for ind, i in enumerate(bbox_head_cfg): if i is None: continue assert type(i) is dict assert type(mmdet_cfg.model.bbox_head[ind]) is ConfigDict mmdet_cfg.model.bbox_head[ind].update(i) mmdet_cfg.model.update(cfg['model']) if train_cfg is not None: if type(train_cfg['rcnn']) == list: assert len(train_cfg['rcnn']) == len(mmdet_cfg.train_cfg.rcnn) rcnn_cfg = train_cfg.pop('rcnn') for ind, i in enumerate(rcnn_cfg): if i is None: continue assert type(i) is dict assert type(mmdet_cfg.train_cfg.rcnn[ind]) is ConfigDict mmdet_cfg.train_cfg.rcnn[ind].update(i) mmdet_cfg.train_cfg.update(train_cfg) if test_cfg is not None: mmdet_cfg.test_cfg.update(test_cfg) model = build_detector(mmdet_cfg.model, train_cfg=mmdet_cfg.train_cfg, test_cfg=mmdet_cfg.test_cfg) model = MMDataParallel(model, device_ids=gpu) return model
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.ckpt: cfg.resume_from = args.ckpt cfg.test_cfg.rcnn.score_thr = 0.5 FOCAL_LENGTH = cfg.get('FOCAL_LENGTH', 1000) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=('Human', )) # add an attribute for visualization convenience model.CLASSES = ('Human', ) model = MMDataParallel(model, device_ids=[0]).cuda() # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, lambda x: x, optimizer, cfg.work_dir, cfg.log_level) runner.resume(cfg.resume_from) model = runner.model model.eval() # necessary for headless rendering os.environ['PYOPENGL_PLATFORM'] = 'egl' render = Renderer(focal_length=FOCAL_LENGTH) img_transform = ImageTransform(size_divisor=32, **img_norm_cfg) img_scale = cfg.common_val_cfg.img_scale with torch.no_grad(): folder_name = args.image_folder output_folder = args.output_folder os.makedirs(output_folder, exist_ok=True) images = os.listdir(folder_name) for image in images: file_name = osp.join(folder_name, image) img = cv2.imread(file_name) ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img, img_scale) # Force padding for the issue of multi-GPU training padded_img = np.zeros((img.shape[0], img_scale[1], img_scale[0]), dtype=img.dtype) padded_img[:, :img.shape[-2], :img.shape[-1]] = img img = padded_img assert img.shape[1] == 512 and img.shape[ 2] == 832, "Image shape incorrect" data_batch = dict( img=DC([to_tensor(img[None, ...])], stack=True), img_meta=DC([{ 'img_shape': img_shape, 'scale_factor': scale_factor, 'flip': False, 'ori_shape': ori_shape }], cpu_only=True), ) bbox_results, pred_results = model(**data_batch, return_loss=False) if pred_results is not None: pred_results['bboxes'] = bbox_results[0] img = denormalize(img) img_viz = prepare_dump(pred_results, img, render, bbox_results, FOCAL_LENGTH) cv2.imwrite( f'{file_name.replace(folder_name, output_folder)}.output.jpg', img_viz[:, :, ::-1])
def test_dic_model(): pretrained = 'https://download.openmmlab.com/mmediting/' + \ 'restorers/dic/light_cnn_feature.pth' model_cfg_pre = dict(type='DIC', generator=dict(type='DICNet', in_channels=3, out_channels=3, mid_channels=48), pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), align_loss=dict(type='MSELoss', loss_weight=0.1, reduction='mean')) model_cfg = dict(type='DIC', generator=dict(type='DICNet', in_channels=3, out_channels=3, mid_channels=48), discriminator=dict(type='LightCNN', in_channels=3), pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), align_loss=dict(type='MSELoss', loss_weight=0.1, reduction='mean'), feature_loss=dict(type='LightCNNFeatureLoss', pretrained=pretrained, loss_weight=0.1, criterion='l1'), gan_loss=dict(type='GANLoss', gan_type='vanilla', loss_weight=0.005, real_label_val=1.0, fake_label_val=0)) scale = 8 train_cfg = None test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale)) # build restorer build_model(model_cfg_pre, train_cfg=train_cfg, test_cfg=test_cfg) restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg) # test attributes assert restorer.__class__.__name__ == 'DIC' # prepare data inputs = torch.rand(1, 3, 16, 16) targets = torch.rand(1, 3, 128, 128) heatmap = torch.rand(1, 68, 32, 32) data_batch = {'lq': inputs, 'gt': targets, 'heatmap': heatmap} # prepare optimizer optim_cfg = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999)) generator = obj_from_dict(optim_cfg, torch.optim, dict(params=restorer.parameters())) discriminator = obj_from_dict(optim_cfg, torch.optim, dict(params=restorer.parameters())) optimizer = dict(generator=generator, discriminator=discriminator) # test train_step and forward_test (cpu) outputs = restorer.train_step(data_batch, optimizer) assert isinstance(outputs, dict) assert isinstance(outputs['log_vars'], dict) assert isinstance(outputs['log_vars']['loss_pixel_v3'], float) assert outputs['num_samples'] == 1 assert outputs['results']['lq'].shape == data_batch['lq'].shape assert outputs['results']['gt'].shape == data_batch['gt'].shape assert torch.is_tensor(outputs['results']['output']) assert outputs['results']['output'].size() == (1, 3, 128, 128) # test train_step and forward_test (gpu) if torch.cuda.is_available(): restorer = restorer.cuda() data_batch = { 'lq': inputs.cuda(), 'gt': targets.cuda(), 'heatmap': heatmap.cuda() } # train_step optim_cfg = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999)) generator = obj_from_dict(optim_cfg, torch.optim, dict(params=restorer.parameters())) discriminator = obj_from_dict(optim_cfg, torch.optim, dict(params=restorer.parameters())) optimizer = dict(generator=generator, discriminator=discriminator) outputs = restorer.train_step(data_batch, optimizer) assert isinstance(outputs, dict) assert isinstance(outputs['log_vars'], dict) assert isinstance(outputs['log_vars']['loss_pixel_v3'], float) assert outputs['num_samples'] == 1 assert outputs['results']['lq'].shape == data_batch['lq'].shape assert outputs['results']['gt'].shape == data_batch['gt'].shape assert torch.is_tensor(outputs['results']['output']) assert outputs['results']['output'].size() == (1, 3, 128, 128) # val_step data_batch.pop('heatmap') result = restorer.val_step(data_batch, meta=[{'gt_path': ''}]) assert isinstance(result, dict) assert isinstance(result['eval_result'], dict) assert result['eval_result'].keys() == set({'PSNR', 'SSIM'}) assert isinstance(result['eval_result']['PSNR'], np.float64) assert isinstance(result['eval_result']['SSIM'], np.float64) with pytest.raises(AssertionError): # evaluation with metrics must have gt images restorer(lq=inputs.cuda(), test_mode=True) with pytest.raises(TypeError): restorer.init_weights(pretrained=1) with pytest.raises(OSError): restorer.init_weights(pretrained='')
def test_ttsr(): model_cfg = dict(type='TTSR', generator=dict(type='TTSRNet', in_channels=3, out_channels=3, mid_channels=64, num_blocks=(16, 16, 8, 4)), extractor=dict(type='LTE'), transformer=dict(type='SearchTransformer'), discriminator=dict(type='TTSRDiscriminator', in_size=64), pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), perceptual_loss=dict(type='PerceptualLoss', layer_weights={'29': 1.0}, vgg_type='vgg19', perceptual_weight=1e-2, style_weight=0.001, criterion='mse'), transferal_perceptual_loss=dict( type='TransferalPerceptualLoss', loss_weight=1e-2, use_attention=False, criterion='mse'), gan_loss=dict(type='GANLoss', gan_type='vanilla', loss_weight=1e-3, real_label_val=1.0, fake_label_val=0)) scale = 4 train_cfg = None test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale)) # build restorer restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg) model_cfg = dict(type='TTSR', generator=dict(type='TTSRNet', in_channels=3, out_channels=3, mid_channels=64, num_blocks=(16, 16, 8, 4)), extractor=dict(type='LTE'), transformer=dict(type='SearchTransformer'), discriminator=dict(type='TTSRDiscriminator', in_size=64), pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), perceptual_loss=dict(type='PerceptualLoss', layer_weights={'29': 1.0}, vgg_type='vgg19', perceptual_weight=1e-2, style_weight=0.001, criterion='mse'), transferal_perceptual_loss=dict( type='TransferalPerceptualLoss', loss_weight=1e-2, use_attention=False, criterion='mse'), gan_loss=dict(type='GANLoss', gan_type='vanilla', loss_weight=1e-3, real_label_val=1.0, fake_label_val=0)) scale = 4 train_cfg = None test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale)) # build restorer restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg) # test attributes assert restorer.__class__.__name__ == 'TTSR' # prepare data inputs = torch.rand(1, 3, 16, 16) targets = torch.rand(1, 3, 64, 64) ref = torch.rand(1, 3, 64, 64) data_batch = { 'lq': inputs, 'gt': targets, 'ref': ref, 'lq_up': ref, 'ref_downup': ref } # prepare optimizer optim_cfg_g = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999)) optim_cfg_d = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999)) optimizer = dict( generator=obj_from_dict(optim_cfg_g, torch.optim, dict(params=restorer.parameters())), discriminator=obj_from_dict(optim_cfg_d, torch.optim, dict(params=restorer.parameters()))) # test train_step and forward_test (cpu) outputs = restorer.train_step(data_batch, optimizer) assert isinstance(outputs, dict) assert isinstance(outputs['log_vars'], dict) assert isinstance(outputs['log_vars']['loss_pix'], float) assert outputs['num_samples'] == 1 assert outputs['results']['lq'].shape == data_batch['lq'].shape assert outputs['results']['gt'].shape == data_batch['gt'].shape assert torch.is_tensor(outputs['results']['output']) assert outputs['results']['output'].size() == (1, 3, 64, 64) # test train_step and forward_test (gpu) if torch.cuda.is_available(): restorer = restorer.cuda() data_batch = { 'lq': inputs.cuda(), 'gt': targets.cuda(), 'ref': ref.cuda(), 'lq_up': ref.cuda(), 'ref_downup': ref.cuda() } # train_step optimizer = dict( generator=obj_from_dict(optim_cfg_g, torch.optim, dict(params=restorer.parameters())), discriminator=obj_from_dict(optim_cfg_d, torch.optim, dict(params=restorer.parameters()))) outputs = restorer.train_step(data_batch, optimizer) assert isinstance(outputs, dict) assert isinstance(outputs['log_vars'], dict) assert isinstance(outputs['log_vars']['loss_pix'], float) assert outputs['num_samples'] == 1 assert outputs['results']['lq'].shape == data_batch['lq'].shape assert outputs['results']['gt'].shape == data_batch['gt'].shape assert torch.is_tensor(outputs['results']['output']) assert outputs['results']['output'].size() == (1, 3, 64, 64) # val_step result = restorer.val_step(data_batch, meta=[{'gt_path': ''}]) assert isinstance(result, dict) assert isinstance(result['eval_result'], dict) assert result['eval_result'].keys() == set({'PSNR', 'SSIM'}) assert isinstance(result['eval_result']['PSNR'], np.float64) assert isinstance(result['eval_result']['SSIM'], np.float64)
import numpy as np import torch from mmcv.utils.config import Config, ConfigDict from mmcv.parallel import MMDataParallel from mmdet.models.builder import build_detector from factory import builder mmdet_cfg = Config.fromfile('factory/models/SERetinaNeXt50.py') model = build_detector(mmdet_cfg.model, mmdet_cfg.train_cfg, mmdet_cfg.test_cfg) model = MMDataParallel(model, device_ids=[0]) X = np.ones(([2, 3, 512, 512])) X = torch.from_numpy(X).float() img_meta = {0: {'img_shape': (3, 512, 512), 'scale_factor': 1., 'flip': False}} y_pred = [] for x in X: y_pred.append( model([x.unsqueeze(0)], img_meta=[img_meta], return_loss=False))
def __init__(self, det='PANet_IC15', det_config='', det_ckpt='', recog='SEG', recog_config='', recog_ckpt='', kie='', kie_config='', kie_ckpt='', config_dir=os.path.join(str(Path.cwd()), 'configs/'), device=None, **kwargs): textdet_models = { 'DB_r18': { 'config': 'dbnet/dbnet_r18_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth' }, 'DB_r50': { 'config': 'dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20211025-9fe3b590.pth' }, 'DBPP_r50': { 'config': 'dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnetpp_r50dcnv2_fpnc_1200e_icdar2015-20220502-d7a76fff.pth' }, 'DRRG': { 'config': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500_20211022-fb30b001.pth' }, 'FCE_IC15': { 'config': 'fcenet/fcenet_r50_fpn_1500e_icdar2015.py', 'ckpt': 'fcenet/fcenet_r50_fpn_1500e_icdar2015_20211022-daefb6ed.pth' }, 'FCE_CTW_DCNv2': { 'config': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py', 'ckpt': 'fcenet/' + 'fcenet_r50dcnv2_fpn_1500e_ctw1500_20211022-e326d7ec.pth' }, 'MaskRCNN_CTW': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth' }, 'MaskRCNN_IC15': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth' }, 'MaskRCNN_IC17': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth' }, 'PANet_CTW': { 'config': 'panet/panet_r18_fpem_ffm_600e_ctw1500.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth' }, 'PANet_IC15': { 'config': 'panet/panet_r18_fpem_ffm_600e_icdar2015.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth' }, 'PS_CTW': { 'config': 'psenet/psenet_r50_fpnf_600e_ctw1500.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth' }, 'PS_IC15': { 'config': 'psenet/psenet_r50_fpnf_600e_icdar2015.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth' }, 'TextSnake': { 'config': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth' }, 'Tesseract': {} } textrecog_models = { 'CRNN': { 'config': 'crnn/crnn_academic_dataset.py', 'ckpt': 'crnn/crnn_academic-a723a1c5.pth' }, 'SAR': { 'config': 'sar/sar_r31_parallel_decoder_academic.py', 'ckpt': 'sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth' }, 'SAR_CN': { 'config': 'sar/sar_r31_parallel_decoder_chinese.py', 'ckpt': 'sar/sar_r31_parallel_decoder_chineseocr_20210507-b4be8214.pth' }, 'NRTR_1/16-1/8': { 'config': 'nrtr/nrtr_r31_1by16_1by8_academic.py', 'ckpt': 'nrtr/nrtr_r31_1by16_1by8_academic_20211124-f60cebf4.pth' }, 'NRTR_1/8-1/4': { 'config': 'nrtr/nrtr_r31_1by8_1by4_academic.py', 'ckpt': 'nrtr/nrtr_r31_1by8_1by4_academic_20211123-e1fdb322.pth' }, 'RobustScanner': { 'config': 'robust_scanner/robustscanner_r31_academic.py', 'ckpt': 'robustscanner/robustscanner_r31_academic-5f05874f.pth' }, 'SATRN': { 'config': 'satrn/satrn_academic.py', 'ckpt': 'satrn/satrn_academic_20211009-cb8b1580.pth' }, 'SATRN_sm': { 'config': 'satrn/satrn_small.py', 'ckpt': 'satrn/satrn_small_20211009-2cf13355.pth' }, 'ABINet': { 'config': 'abinet/abinet_academic.py', 'ckpt': 'abinet/abinet_academic-f718abf6.pth' }, 'SEG': { 'config': 'seg/seg_r31_1by16_fpnocr_academic.py', 'ckpt': 'seg/seg_r31_1by16_fpnocr_academic-72235b11.pth' }, 'CRNN_TPS': { 'config': 'tps/crnn_tps_academic_dataset.py', 'ckpt': 'tps/crnn_tps_academic_dataset_20210510-d221a905.pth' }, 'Tesseract': {}, 'MASTER': { 'config': 'master/master_academic.py', 'ckpt': 'master/master_r31_12e_ST_MJ_SA-787edd36.pth' } } kie_models = { 'SDMGR': { 'config': 'sdmgr/sdmgr_unet16_60e_wildreceipt.py', 'ckpt': 'sdmgr/sdmgr_unet16_60e_wildreceipt_20210520-7489e6de.pth' } } self.td = det self.tr = recog self.kie = kie self.device = device if self.device is None: self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # Check if the det/recog model choice is valid if self.td and self.td not in textdet_models: raise ValueError(self.td, 'is not a supported text detection algorthm') elif self.tr and self.tr not in textrecog_models: raise ValueError(self.tr, 'is not a supported text recognition algorithm') elif self.kie: if self.kie not in kie_models: raise ValueError( self.kie, 'is not a supported key information extraction' ' algorithm') elif not (self.td and self.tr): raise NotImplementedError( self.kie, 'has to run together' ' with text detection and recognition algorithms.') self.detect_model = None if self.td and self.td == 'Tesseract': if tesserocr is None: raise ImportError('Please install tesserocr first. ' 'Check out the installation guide at ' 'https://github.com/sirfz/tesserocr') self.detect_model = 'Tesseract_det' elif self.td: # Build detection model if not det_config: det_config = os.path.join(config_dir, 'textdet/', textdet_models[self.td]['config']) if not det_ckpt: det_ckpt = 'https://download.openmmlab.com/mmocr/textdet/' + \ textdet_models[self.td]['ckpt'] self.detect_model = init_detector(det_config, det_ckpt, device=self.device) self.detect_model = revert_sync_batchnorm(self.detect_model) self.recog_model = None if self.tr and self.tr == 'Tesseract': if tesserocr is None: raise ImportError('Please install tesserocr first. ' 'Check out the installation guide at ' 'https://github.com/sirfz/tesserocr') self.recog_model = 'Tesseract_recog' elif self.tr: # Build recognition model if not recog_config: recog_config = os.path.join( config_dir, 'textrecog/', textrecog_models[self.tr]['config']) if not recog_ckpt: recog_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'textrecog/' + textrecog_models[self.tr]['ckpt'] self.recog_model = init_detector(recog_config, recog_ckpt, device=self.device) self.recog_model = revert_sync_batchnorm(self.recog_model) self.kie_model = None if self.kie: # Build key information extraction model if not kie_config: kie_config = os.path.join(config_dir, 'kie/', kie_models[self.kie]['config']) if not kie_ckpt: kie_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'kie/' + kie_models[self.kie]['ckpt'] kie_cfg = Config.fromfile(kie_config) self.kie_model = build_detector(kie_cfg.model, test_cfg=kie_cfg.get('test_cfg')) self.kie_model = revert_sync_batchnorm(self.kie_model) self.kie_model.cfg = kie_cfg load_checkpoint(self.kie_model, kie_ckpt, map_location=self.device) # Attribute check for model in list(filter(None, [self.recog_model, self.detect_model])): if hasattr(model, 'module'): model = model.module
def _file2dict(filename): print('_file2dict:', filename) if 'http' in filename: # import pdb; pdb.set_trace() temp_filename = _gettemp_pyfile() assert filename.endswith('.py') result = os.system(f'wget {filename} -O {temp_filename}') assert result == 0, f"Cannot download: {filename}" # Update filename filename = temp_filename filename = os.path.abspath(os.path.expanduser(filename)) check_file_exist(filename) if filename.endswith('.py'): with tempfile.TemporaryDirectory() as temp_config_dir: temp_config_file = tempfile.NamedTemporaryFile( dir=temp_config_dir, suffix='.py') temp_config_name = os.path.basename(temp_config_file.name) shutil.copyfile( filename, os.path.join(temp_config_dir, temp_config_name)) temp_module_name = os.path.splitext(temp_config_name)[0] sys.path.insert(0, temp_config_dir) Config._validate_py_syntax(filename) mod = import_module(temp_module_name) sys.path.pop(0) cfg_dict = { name: value for name, value in mod.__dict__.items() if not name.startswith('__') } # delete imported module del sys.modules[temp_module_name] # close temp file temp_config_file.close() elif filename.endswith(('.yml', '.yaml', '.json')): import mmcv cfg_dict = mmcv.load(filename) else: raise IOError('Only py/yml/yaml/json type are supported now!') cfg_text = filename + '\n' with open(filename, 'r') as f: cfg_text += f.read() if BASE_KEY in cfg_dict: cfg_dir = os.path.dirname(filename) base_filename = cfg_dict.pop(BASE_KEY) base_filename = base_filename if isinstance( base_filename, list) else [base_filename] cfg_dict_list = list() cfg_text_list = list() for f in base_filename: base_path = os.path.join(cfg_dir, f) if not 'http' in f else f _cfg_dict, _cfg_text = PublicConfig._file2dict(base_path) cfg_dict_list.append(_cfg_dict) cfg_text_list.append(_cfg_text) base_cfg_dict = dict() for c in cfg_dict_list: if len(base_cfg_dict.keys() & c.keys()) > 0: raise KeyError('Duplicate key is not allowed among bases') base_cfg_dict.update(c) base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) cfg_dict = base_cfg_dict # merge cfg_text cfg_text_list.append(cfg_text) cfg_text = '\n'.join(cfg_text_list) return cfg_dict, cfg_text
def test_liif(): model_cfg = dict(type='LIIF', generator=dict(type='LIIFEDSR', encoder=dict(type='EDSR', in_channels=3, out_channels=3, mid_channels=64, num_blocks=16), imnet=dict( type='MLPRefiner', in_dim=64, out_dim=3, hidden_list=[256, 256, 256, 256]), local_ensemble=True, feat_unfold=True, cell_decode=True, eval_bsize=30000), rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1., 1., 1.), pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean')) scale_max = 4 train_cfg = None test_cfg = Config(dict(metrics=['PSNR', 'SSIM'], crop_border=scale_max)) # build restorer restorer = build_model(model_cfg, train_cfg=train_cfg, test_cfg=test_cfg) # test attributes assert restorer.__class__.__name__ == 'LIIF' # prepare data inputs = torch.rand(1, 3, 22, 11) targets = torch.rand(1, 128 * 64, 3) coord = torch.rand(1, 128 * 64, 2) cell = torch.rand(1, 128 * 64, 2) data_batch = {'lq': inputs, 'gt': targets, 'coord': coord, 'cell': cell} # prepare optimizer optim_cfg = dict(type='Adam', lr=1e-4, betas=(0.9, 0.999)) optimizer = obj_from_dict(optim_cfg, torch.optim, dict(params=restorer.parameters())) # test train_step and forward_test (cpu) outputs = restorer.train_step(data_batch, optimizer) assert isinstance(outputs, dict) assert isinstance(outputs['log_vars'], dict) assert isinstance(outputs['log_vars']['loss_pix'], float) assert outputs['num_samples'] == 1 assert outputs['results']['lq'].shape == data_batch['lq'].shape assert outputs['results']['gt'].shape == data_batch['gt'].shape assert torch.is_tensor(outputs['results']['output']) assert outputs['results']['output'].size() == (1, 128 * 64, 3) # test train_step and forward_test (gpu) if torch.cuda.is_available(): restorer = restorer.cuda() data_batch = { 'lq': inputs.cuda(), 'gt': targets.cuda(), 'coord': coord.cuda(), 'cell': cell.cuda() } # train_step optimizer = obj_from_dict(optim_cfg, torch.optim, dict(params=restorer.parameters())) outputs = restorer.train_step(data_batch, optimizer) assert isinstance(outputs, dict) assert isinstance(outputs['log_vars'], dict) assert isinstance(outputs['log_vars']['loss_pix'], float) assert outputs['num_samples'] == 1 assert outputs['results']['lq'].shape == data_batch['lq'].shape assert outputs['results']['gt'].shape == data_batch['gt'].shape assert torch.is_tensor(outputs['results']['output']) assert outputs['results']['output'].size() == (1, 128 * 64, 3) # val_step result = restorer.val_step(data_batch, meta=[{'gt_path': ''}]) assert isinstance(result, dict) assert isinstance(result['eval_result'], dict) assert result['eval_result'].keys() == set({'PSNR', 'SSIM'}) assert isinstance(result['eval_result']['PSNR'], np.float64) assert isinstance(result['eval_result']['SSIM'], np.float64)
def __init__(self, det='PANet_IC15', det_config='', det_ckpt='', recog='SEG', recog_config='', recog_ckpt='', kie='', kie_config='', kie_ckpt='', config_dir=os.path.join(str(Path.cwd()), 'configs/'), device='cuda:0', **kwargs): textdet_models = { 'DB_r18': { 'config': 'dbnet/dbnet_r18_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth' }, 'DB_r50': { 'config': 'dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py', 'ckpt': 'dbnet/' 'dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20210325-91cef9af.pth' }, 'DRRG': { 'config': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'drrg/drrg_r50_fpn_unet_1200e_ctw1500-1abf4f67.pth' }, 'FCE_IC15': { 'config': 'fcenet/fcenet_r50_fpn_1500e_icdar2015.py', 'ckpt': 'fcenet/fcenet_r50_fpn_1500e_icdar2015-d435c061.pth' }, 'FCE_CTW_DCNv2': { 'config': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py', 'ckpt': 'fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500-05d740bb.pth' }, 'MaskRCNN_CTW': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth' }, 'MaskRCNN_IC15': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth' }, 'MaskRCNN_IC17': { 'config': 'maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py', 'ckpt': 'maskrcnn/' 'mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth' }, 'PANet_CTW': { 'config': 'panet/panet_r18_fpem_ffm_600e_ctw1500.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth' }, 'PANet_IC15': { 'config': 'panet/panet_r18_fpem_ffm_600e_icdar2015.py', 'ckpt': 'panet/' 'panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth' }, 'PS_CTW': { 'config': 'psenet/psenet_r50_fpnf_600e_ctw1500.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth' }, 'PS_IC15': { 'config': 'psenet/psenet_r50_fpnf_600e_icdar2015.py', 'ckpt': 'psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth' }, 'TextSnake': { 'config': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py', 'ckpt': 'textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth' } } textrecog_models = { 'CRNN': { 'config': 'crnn/crnn_academic_dataset.py', 'ckpt': 'crnn/crnn_academic-a723a1c5.pth' }, 'SAR': { 'config': 'sar/sar_r31_parallel_decoder_academic.py', 'ckpt': 'sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth' }, 'NRTR_1/16-1/8': { 'config': 'nrtr/nrtr_r31_1by16_1by8_academic.py', 'ckpt': 'nrtr/nrtr_r31_academic_20210406-954db95e.pth' }, 'NRTR_1/8-1/4': { 'config': 'nrtr/nrtr_r31_1by8_1by4_academic.py', 'ckpt': 'nrtr/nrtr_r31_1by8_1by4_academic_20210406-ce16e7cc.pth' }, 'RobustScanner': { 'config': 'robust_scanner/robustscanner_r31_academic.py', 'ckpt': 'robustscanner/robustscanner_r31_academic-5f05874f.pth' }, 'SEG': { 'config': 'seg/seg_r31_1by16_fpnocr_academic.py', 'ckpt': 'seg/seg_r31_1by16_fpnocr_academic-72235b11.pth' }, 'CRNN_TPS': { 'config': 'tps/crnn_tps_academic_dataset.py', 'ckpt': 'tps/crnn_tps_academic_dataset_20210510-d221a905.pth' } } kie_models = { 'SDMGR': { 'config': 'sdmgr/sdmgr_unet16_60e_wildreceipt.py', 'ckpt': 'sdmgr/sdmgr_unet16_60e_wildreceipt_20210520-7489e6de.pth' } } self.td = det self.tr = recog self.kie = kie self.device = device # Check if the det/recog model choice is valid if self.td and self.td not in textdet_models: raise ValueError(self.td, 'is not a supported text detection algorthm') elif self.tr and self.tr not in textrecog_models: raise ValueError(self.tr, 'is not a supported text recognition algorithm') elif self.kie and self.kie not in kie_models: raise ValueError( self.kie, 'is not a supported key information extraction' ' algorithm') self.detect_model = None if self.td: # Build detection model if not det_config: det_config = os.path.join(config_dir, 'textdet/', textdet_models[self.td]['config']) if not det_ckpt: det_ckpt = 'https://download.openmmlab.com/mmocr/textdet/' + \ textdet_models[self.td]['ckpt'] self.detect_model = init_detector(det_config, det_ckpt, device=self.device) self.recog_model = None if self.tr: # Build recognition model if not recog_config: recog_config = os.path.join( config_dir, 'textrecog/', textrecog_models[self.tr]['config']) if not recog_ckpt: recog_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'textrecog/' + textrecog_models[self.tr]['ckpt'] self.recog_model = init_detector(recog_config, recog_ckpt, device=self.device) self.kie_model = None if self.kie: # Build key information extraction model if not kie_config: kie_config = os.path.join(config_dir, 'kie/', kie_models[self.kie]['config']) if not kie_ckpt: kie_ckpt = 'https://download.openmmlab.com/mmocr/' + \ 'kie/' + kie_models[self.kie]['ckpt'] kie_cfg = Config.fromfile(kie_config) self.kie_model = build_detector(kie_cfg.model, test_cfg=kie_cfg.get('test_cfg')) self.kie_model.cfg = kie_cfg load_checkpoint(self.kie_model, kie_ckpt, map_location=self.device) # Attribute check for model in list(filter(None, [self.recog_model, self.detect_model])): if hasattr(model, 'module'): model = model.module if model.cfg.data.test['type'] == 'ConcatDataset': model.cfg.data.test.pipeline = \ model.cfg.data.test['datasets'][0].pipeline
if __name__ == "__main__": # Test backend backend = GCSBackend("determined-ai-coco-dataset") img_bytes = backend.get("annotations2017/instances_val2017.json") import os with open("/tmp/instances_val2017.json", "wb") as f: f.write(img_bytes) print("done") # Test dataloader from mmcv import Config from mmdet.models import build_detector from mmcv.runner import load_checkpoint cfg = Config.fromfile("configs/retinanet/retinanet_r50_fpn_1x_coco.py") sub_backend("gcs", cfg) cfg.data.val.ann_file = "/tmp/instances_val2017.json" cfg.data.val.test_mode = True model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) checkpoint = load_checkpoint( model, "./retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth" ) model.cuda() model.eval() dataset, data_loader = build_dataloader(cfg.data.val, 1, 1, 8, False) # from mmdet.core import encode_mask_results # results = []