def mnist(root_dir, train): # Normalize the training set with augmentation transform_train = transforms.Compose( [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Resize((32, 32)), transforms.Normalize(0.406, 0.225, inplace=True) ] ) # Normalize the test set same as training set without augmentation transform_test = transforms.Compose( [ transforms.ToTensor(), transforms.Resize((32, 32)), transforms.Normalize(0.406, 0.225, inplace=True) ] ) opener = urllib.request.URLopener() opener.addheader('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36') dataset = MNIST(root=root_dir, train=train, download=True) return get_dataset(dataset, train, transform_test, transform_train)
def main(): config_path = './config/cfg_retinanet_r50_fpn_trafficsign_extra_aug.py' cfg = Config.fromfile(config_path) dataset = get_dataset(cfg.data.train, TrafficSign) ana = AnalyzeDataset('traffic_sign', dataset, checkonly=True) ana.imgcheck(1521)
def __init__(self, model, img_path, use_cuda=True): self.model = model self.img = cv2.imread(img_path, 1) self.use_cuda = use_cuda self.cfg_path = '../config/cfg_ssd300_vgg16_voc.py' self.cfg = Config.fromfile(self.cfg_path) self.cfg.data_root = '../data/VOCdevkit/' # train self.cfg.data.train.dataset.ann_file = [ self.cfg.data_root + 'VOC2007/ImageSets/Main/trainval.txt', self.cfg.data_root + 'VOC2012/ImageSets/Main/trainval.txt' ] self.cfg.data.train.dataset.img_prefix = [ self.cfg.data_root + 'VOC2007/', self.cfg.data_root + 'VOC2012/' ] # val self.cfg.data.val.ann_file = self.cfg.data_root + 'VOC2007/ImageSets/Main/test.txt' self.cfg.data.val.img_prefix = self.cfg.data_root + 'VOC2007/' # test self.cfg.data.test.ann_file = self.cfg.data_root + 'VOC2007/ImageSets/Main/test.txt' self.cfg.data.test.img_prefix = self.cfg.data_root + 'VOC2007/' self.dataset = get_dataset(self.cfg.data.train, VOCDataset) # img preprocess self.img = np.float32(cv2.resize(self.img, (224, 224))) / 255 # 变更尺寸,归一化 self.img = self.preprocess_image() # (1, c, h, w)
def train(cfg_path, dataset_class): """借用mmcv的Runner框架进行训练,包括里边的hooks作为lr更新,loss计算的工具 1. dataset的数据集输出打包了img/gt_bbox/label/,采用DataContainer封装 2. Dataloader的default_collate用定制collate替换,从而支持dataset的多类型数据 3. DataParallel外壳用定制MMDataparallel替换,从而支持DataContainer """ # 初始化2个默认选项 distributed = False parallel = True # get cfg cfg = Config.fromfile(cfg_path) # set backends if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # get logger distributed = False parallel = True logger = get_root_logger(cfg.log_level) logger.info('Distributed training: {}'.format(distributed)) logger.info('DataParallel training: {}'.format(parallel)) # build model & detector model = OneStageDetector(cfg) # model = OneStageDetector(cfg) if not parallel: model = model.cuda() else: model = NNDataParallel(model, device_ids=range(cfg.gpus)).cuda() # prepare data & dataloader # Runner要求dataloader放在list里: 使workflow里每个flow对应一个dataloader dataset = get_dataset(cfg.data.train, dataset_class) batch_size = cfg.gpus * cfg.data.imgs_per_gpu num_workers = cfg.gpus * cfg.data.workers_per_gpu dataloader = [ DataLoader(dataset, batch_size=batch_size, sampler=GroupSampler(dataset, cfg.data.imgs_per_gpu), num_workers=num_workers, collate_fn=partial(collate, samples_per_gpu=cfg.data.imgs_per_gpu), pin_memory=False) ] # define runner and running type(1.resume, 2.load, 3.train/test) runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config) if cfg.resume_from: # 恢复训练: './work_dirs/ssd300_voc/latest.pth' runner.resume(cfg.resume_from, map_location=lambda storage, loc: storage) elif cfg.load_from: # 加载参数进行测试 runner.load_checkpoint(cfg.load_from) # 开始训练: 采用workflow来区分train还是test runner.run(dataloader, cfg.workflow, cfg.total_epochs)
def preprocess_data(self, dataset_class): self.dataset = get_dataset(self.cfg.data.test, dataset_class) self.dataloader = build_dataloader( self.dataset, imgs_per_gpu=1, workers_per_gpu=self.cfg.data.workers_per_gpu, num_gpus=1, dist=False, shuffle=False)
def test_voc_dataset(): data_root = '../data/VOCdevkit/' # 指代ssd目录下的data目录 img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) cfg_train = dict( type='RepeatDataset', times=10, dataset=dict( type='VOCDataset', ann_file=[ data_root + 'VOC2007/ImageSets/Main/trainval.txt', data_root + 'VOC2012/ImageSets/Main/trainval.txt' ], img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], img_scale=(300, 300), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0.5, with_mask=False, with_crowd=False, with_label=True, test_mode=False, extra_aug=dict(photo_metric_distortion=dict(brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), expand=dict(mean=img_norm_cfg['mean'], to_rgb=img_norm_cfg['to_rgb'], ratio_range=(1, 4)), random_crop=dict(min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), resize_keep_ratio=False)) trainset = get_dataset(cfg_train, VOCDataset) classes = trainset.CLASSES data = trainset[1120] # dict('img', 'img_meta', ) """已做的数据处理:rgb化,chw化,归一化,tensor化""" bbox = data['gt_bboxes'].data.numpy() label = data['gt_labels'].data.numpy() img = data['img'].data.numpy() # 逆tensor img1 = img.transpose(1, 2, 0) # 逆chw img2 = np.clip( (img1 * img_norm_cfg['std'] + img_norm_cfg['mean']).astype(np.int32), 0, 255) # 逆归一 # plt.imshow(img2) vis_bbox(img2[..., [2, 0, 1]], bbox, label - 1, label_names=classes) # vis_bbox内部会bgr转rgb,所以这里要用bgr输入
def cifar10(root_dir, train): # Normalize the training set with augmentation transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) # Normalize the test set same as training set without augmentation transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) dataset = CIFAR10(root=root_dir, train=train, download=True) return get_dataset(dataset, train, transform_test, transform_train)
def test_dataloader(): data_root = '../data/VOCdevkit/' # 指代ssd目录下的data目录 img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) cfg_train = dict( type='RepeatDataset', times=10, dataset=dict( type='VOCDataset', ann_file=[ data_root + 'VOC2007/ImageSets/Main/trainval.txt', data_root + 'VOC2012/ImageSets/Main/trainval.txt' ], img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], img_scale=(300, 300), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0.5, with_mask=False, with_crowd=False, with_label=True, test_mode=False, extra_aug=dict(photo_metric_distortion=dict(brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), expand=dict(mean=img_norm_cfg['mean'], to_rgb=img_norm_cfg['to_rgb'], ratio_range=(1, 4)), random_crop=dict(min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), resize_keep_ratio=False)) trainset = get_dataset(cfg_train, VOCDataset) dataloader = Dataloader
dataset_type = 'TrafficSign' # 改成trafficsign data_root = './data/traffic_sign/' # 改成trafficsign img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) trainset_cfg = dict( type=dataset_type, ann_file=data_root + 'train_label_fix.csv', img_prefix=data_root + 'Train_fix/', # img_scale=(1333, 800), # 把图片缩小 img_scale=(3200, 1800), # 把图片保持尺寸 img_norm_cfg=img_norm_cfg, size_divisor=32, with_label=True, extra_aug=None) dataset = get_dataset(trainset_cfg, TrafficSign) ana = AnalyzeDataset('traffic_sign', dataset, checkonly=False) # ana.cluster_bbox(show=True) ana.types_bin(show=True) ana.bbox_size(show=True) # ana.imgcheck(2) if dset == 'voc': dataset_type = 'VOCDataset' data_root = './data/VOCdevkit/' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) trainset_cfg = dict( type=dataset_type, ann_file=[
def main(): """针对faster rcnn在voc的评估做微调 1. args parse用直接输入替代 2. """ config_path = './config/cfg_ssd300_vgg16_voc.py' # 注意:cfg和模型需要匹配,因为不同数据集类别数不一样, checkpoint_path = './weights/myssd/epoch_24.pth' cfg = mmcv.Config.fromfile(config_path) out_file = 'dataset_eval_result/results.pkl' # 注意这里要选择pkl而不能选择json,因为outputs里边包含array,用json无法保存 eval_type = ['bbox'] # proposal_fast是mmdetection自己的实现 # eval_type = ['proposal','bbox'] # 这几种是coco api的实现包括['proposal','bbox','segm','keypoints'],已跑通 show_result = False # 这里可以设置show=True从而按顺序显示每张图的测试结果(对于少量图片的数据集可以这么玩) if not out_file.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # cfg.model.pretrained = None # dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True)) dataset = get_dataset(cfg.data.test, CocoDataset) cfg.gpus = 1 if cfg.gpus == 1: # model = build_detector( # cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) model = OneStageDetector(cfg) load_checkpoint(model, checkpoint_path) model = MMDataParallel(model, device_ids=[0]) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, num_gpus=1, dist=False, shuffle=False) outputs = single_test(model, data_loader, show=show_result) # outputs结构: [img1,...imgn], len=5000,此为coco val的所有图片 # 每个img结构: [cls1,...clsn], len=80, 此为每个预测的所有类的bbox预测输出 # 每个cls结构: ndarray(n,5), 此为这个cls对应n个bbox,如果该类有预测则n>0,如果该类没有预测则n=0,第一列为置信度? # 注意:最内层数据结构是ndarray,是不能直接存入json文件,需要转换成data.tolist() else: model_args = cfg.model.copy() model_args.update(train_cfg=None, test_cfg=cfg.test_cfg) model_type = getattr(detectors, model_args.pop('type')) outputs = parallel_test( model_type, model_args, checkpoint_path, dataset, _data_func, range(cfg.gpus), workers_per_gpu=cfg.proc_per_gpu) # debug if out_file: print('writing results to {}'.format(out_file)) mmcv.dump(outputs, out_file) # 先把模型的测试结果输出到文件中: 如果文件不存在会创建 eval_types = eval_type if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = out_file # coco_eval(result_file, eval_types, dataset.coco) # result_file传入coco_eval() """用自己写的evaluation()""" evaluation(result_file, dataset.coco, eval_types=eval_types) else: if not isinstance(outputs[0], dict): result_file = out_file + '.json' results2json(dataset, outputs, result_file) # coco_eval(result_file, eval_types, dataset.coco) """用自己写的evaluation()""" evaluation(result_file, dataset.coco, eval_types=eval_types) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = out_file + '.{}.json'.format(name) results2json(dataset, outputs_, result_file) coco_eval(result_file, eval_types, dataset.coco)