Esempio n. 1
0
def mnist(root_dir, train):
    # Normalize the training set with augmentation
    transform_train = transforms.Compose(
        [
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Resize((32, 32)),
            transforms.Normalize(0.406, 0.225, inplace=True)
        ]
    )

    # Normalize the test set same as training set without augmentation
    transform_test = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Resize((32, 32)),
            transforms.Normalize(0.406, 0.225, inplace=True)
        ]
    )

    opener = urllib.request.URLopener()
    opener.addheader('User-Agent',
                     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 '
                     '(KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36')

    dataset = MNIST(root=root_dir, train=train, download=True)
    return get_dataset(dataset, train, transform_test, transform_train)
def main():
    config_path = './config/cfg_retinanet_r50_fpn_trafficsign_extra_aug.py'
    cfg = Config.fromfile(config_path)
    dataset = get_dataset(cfg.data.train, TrafficSign)
    ana = AnalyzeDataset('traffic_sign', dataset, checkonly=True)
    
    ana.imgcheck(1521)
    def __init__(self, model, img_path, use_cuda=True):
        self.model = model
        self.img = cv2.imread(img_path, 1)
        self.use_cuda = use_cuda
        self.cfg_path = '../config/cfg_ssd300_vgg16_voc.py'
        self.cfg = Config.fromfile(self.cfg_path)
        self.cfg.data_root = '../data/VOCdevkit/'
        # train
        self.cfg.data.train.dataset.ann_file = [
            self.cfg.data_root + 'VOC2007/ImageSets/Main/trainval.txt',
            self.cfg.data_root + 'VOC2012/ImageSets/Main/trainval.txt'
        ]
        self.cfg.data.train.dataset.img_prefix = [
            self.cfg.data_root + 'VOC2007/', self.cfg.data_root + 'VOC2012/'
        ]
        # val
        self.cfg.data.val.ann_file = self.cfg.data_root + 'VOC2007/ImageSets/Main/test.txt'
        self.cfg.data.val.img_prefix = self.cfg.data_root + 'VOC2007/'
        # test
        self.cfg.data.test.ann_file = self.cfg.data_root + 'VOC2007/ImageSets/Main/test.txt'
        self.cfg.data.test.img_prefix = self.cfg.data_root + 'VOC2007/'

        self.dataset = get_dataset(self.cfg.data.train, VOCDataset)

        # img preprocess
        self.img = np.float32(cv2.resize(self.img,
                                         (224, 224))) / 255  # 变更尺寸,归一化
        self.img = self.preprocess_image()  # (1, c, h, w)
Esempio n. 4
0
def train(cfg_path, dataset_class):
    """借用mmcv的Runner框架进行训练,包括里边的hooks作为lr更新,loss计算的工具
    1. dataset的数据集输出打包了img/gt_bbox/label/,采用DataContainer封装
    2. Dataloader的default_collate用定制collate替换,从而支持dataset的多类型数据
    3. DataParallel外壳用定制MMDataparallel替换,从而支持DataContainer
    """
    # 初始化2个默认选项
    distributed = False
    parallel = True

    # get cfg
    cfg = Config.fromfile(cfg_path)

    # set backends
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # get logger
    distributed = False
    parallel = True
    logger = get_root_logger(cfg.log_level)
    logger.info('Distributed training: {}'.format(distributed))
    logger.info('DataParallel training: {}'.format(parallel))
    # build model & detector
    model = OneStageDetector(cfg)
    #    model = OneStageDetector(cfg)
    if not parallel:
        model = model.cuda()
    else:
        model = NNDataParallel(model, device_ids=range(cfg.gpus)).cuda()

    # prepare data & dataloader
    # Runner要求dataloader放在list里: 使workflow里每个flow对应一个dataloader
    dataset = get_dataset(cfg.data.train, dataset_class)
    batch_size = cfg.gpus * cfg.data.imgs_per_gpu
    num_workers = cfg.gpus * cfg.data.workers_per_gpu
    dataloader = [
        DataLoader(dataset,
                   batch_size=batch_size,
                   sampler=GroupSampler(dataset, cfg.data.imgs_per_gpu),
                   num_workers=num_workers,
                   collate_fn=partial(collate,
                                      samples_per_gpu=cfg.data.imgs_per_gpu),
                   pin_memory=False)
    ]

    # define runner and running type(1.resume, 2.load, 3.train/test)
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    if cfg.resume_from:  # 恢复训练: './work_dirs/ssd300_voc/latest.pth'
        runner.resume(cfg.resume_from,
                      map_location=lambda storage, loc: storage)
    elif cfg.load_from:  # 加载参数进行测试
        runner.load_checkpoint(cfg.load_from)
    # 开始训练: 采用workflow来区分train还是test
    runner.run(dataloader, cfg.workflow, cfg.total_epochs)
 def preprocess_data(self, dataset_class):
     self.dataset = get_dataset(self.cfg.data.test, dataset_class)
     self.dataloader = build_dataloader(
         self.dataset,
         imgs_per_gpu=1,
         workers_per_gpu=self.cfg.data.workers_per_gpu,
         num_gpus=1,
         dist=False,
         shuffle=False)
Esempio n. 6
0
def test_voc_dataset():
    data_root = '../data/VOCdevkit/'  # 指代ssd目录下的data目录
    img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
                        std=[1, 1, 1],
                        to_rgb=True)
    cfg_train = dict(
        type='RepeatDataset',
        times=10,
        dataset=dict(
            type='VOCDataset',
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
            img_scale=(300, 300),
            img_norm_cfg=img_norm_cfg,
            size_divisor=None,
            flip_ratio=0.5,
            with_mask=False,
            with_crowd=False,
            with_label=True,
            test_mode=False,
            extra_aug=dict(photo_metric_distortion=dict(brightness_delta=32,
                                                        contrast_range=(0.5,
                                                                        1.5),
                                                        saturation_range=(0.5,
                                                                          1.5),
                                                        hue_delta=18),
                           expand=dict(mean=img_norm_cfg['mean'],
                                       to_rgb=img_norm_cfg['to_rgb'],
                                       ratio_range=(1, 4)),
                           random_crop=dict(min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
                                            min_crop_size=0.3)),
            resize_keep_ratio=False))

    trainset = get_dataset(cfg_train, VOCDataset)
    classes = trainset.CLASSES
    data = trainset[1120]  # dict('img', 'img_meta', )
    """已做的数据处理:rgb化,chw化,归一化,tensor化"""
    bbox = data['gt_bboxes'].data.numpy()
    label = data['gt_labels'].data.numpy()
    img = data['img'].data.numpy()  # 逆tensor
    img1 = img.transpose(1, 2, 0)  # 逆chw
    img2 = np.clip(
        (img1 * img_norm_cfg['std'] + img_norm_cfg['mean']).astype(np.int32),
        0, 255)  # 逆归一
    #        plt.imshow(img2)
    vis_bbox(img2[..., [2, 0, 1]], bbox, label - 1,
             label_names=classes)  # vis_bbox内部会bgr转rgb,所以这里要用bgr输入
Esempio n. 7
0
def cifar10(root_dir, train):
    # Normalize the training set with augmentation
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    # Normalize the test set same as training set without augmentation
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    dataset = CIFAR10(root=root_dir, train=train, download=True)
    return get_dataset(dataset, train, transform_test, transform_train)
Esempio n. 8
0
def test_dataloader():
    data_root = '../data/VOCdevkit/'  # 指代ssd目录下的data目录
    img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
                        std=[1, 1, 1],
                        to_rgb=True)
    cfg_train = dict(
        type='RepeatDataset',
        times=10,
        dataset=dict(
            type='VOCDataset',
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
            img_scale=(300, 300),
            img_norm_cfg=img_norm_cfg,
            size_divisor=None,
            flip_ratio=0.5,
            with_mask=False,
            with_crowd=False,
            with_label=True,
            test_mode=False,
            extra_aug=dict(photo_metric_distortion=dict(brightness_delta=32,
                                                        contrast_range=(0.5,
                                                                        1.5),
                                                        saturation_range=(0.5,
                                                                          1.5),
                                                        hue_delta=18),
                           expand=dict(mean=img_norm_cfg['mean'],
                                       to_rgb=img_norm_cfg['to_rgb'],
                                       ratio_range=(1, 4)),
                           random_crop=dict(min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
                                            min_crop_size=0.3)),
            resize_keep_ratio=False))

    trainset = get_dataset(cfg_train, VOCDataset)
    dataloader = Dataloader
        dataset_type = 'TrafficSign'  # 改成trafficsign
        data_root = './data/traffic_sign/'  # 改成trafficsign
        img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
                            std=[58.395, 57.12, 57.375],
                            to_rgb=True)
        trainset_cfg = dict(
            type=dataset_type,
            ann_file=data_root + 'train_label_fix.csv',
            img_prefix=data_root + 'Train_fix/',
            #                    img_scale=(1333, 800),      # 把图片缩小
            img_scale=(3200, 1800),  # 把图片保持尺寸
            img_norm_cfg=img_norm_cfg,
            size_divisor=32,
            with_label=True,
            extra_aug=None)
        dataset = get_dataset(trainset_cfg, TrafficSign)
        ana = AnalyzeDataset('traffic_sign', dataset, checkonly=False)
        #        ana.cluster_bbox(show=True)
        ana.types_bin(show=True)
        ana.bbox_size(show=True)
#        ana.imgcheck(2)

    if dset == 'voc':
        dataset_type = 'VOCDataset'
        data_root = './data/VOCdevkit/'
        img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
                            std=[58.395, 57.12, 57.375],
                            to_rgb=True)
        trainset_cfg = dict(
            type=dataset_type,
            ann_file=[
Esempio n. 10
0
def main():
    """针对faster rcnn在voc的评估做微调
    1. args parse用直接输入替代
    2. 
    """
    config_path = './config/cfg_ssd300_vgg16_voc.py'   # 注意:cfg和模型需要匹配,因为不同数据集类别数不一样,  
    checkpoint_path = './weights/myssd/epoch_24.pth'   
    cfg = mmcv.Config.fromfile(config_path)
    out_file = 'dataset_eval_result/results.pkl'  # 注意这里要选择pkl而不能选择json,因为outputs里边包含array,用json无法保存
    eval_type = ['bbox']      # proposal_fast是mmdetection自己的实现
#    eval_type = ['proposal','bbox']   # 这几种是coco api的实现包括['proposal','bbox','segm','keypoints'],已跑通
                                    
    show_result = False   # 这里可以设置show=True从而按顺序显示每张图的测试结果(对于少量图片的数据集可以这么玩)
    
    if not out_file.endswith(('.pkl', '.pickle')):
        raise ValueError('The output file must be a pkl file.')
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
#    cfg.model.pretrained = None

#    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
    dataset = get_dataset(cfg.data.test, CocoDataset)
    
    cfg.gpus = 1
    
    if cfg.gpus == 1:
#        model = build_detector(
#            cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
        model = OneStageDetector(cfg)
        
        load_checkpoint(model, checkpoint_path)
        model = MMDataParallel(model, device_ids=[0])

        data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            num_gpus=1,
            dist=False,
            shuffle=False)
        outputs = single_test(model, data_loader, show=show_result)  
        # outputs结构: [img1,...imgn], len=5000,此为coco val的所有图片
        # 每个img结构: [cls1,...clsn], len=80, 此为每个预测的所有类的bbox预测输出
        # 每个cls结构: ndarray(n,5), 此为这个cls对应n个bbox,如果该类有预测则n>0,如果该类没有预测则n=0,第一列为置信度?
        # 注意:最内层数据结构是ndarray,是不能直接存入json文件,需要转换成data.tolist()
    else:
        model_args = cfg.model.copy()
        model_args.update(train_cfg=None, test_cfg=cfg.test_cfg)
        model_type = getattr(detectors, model_args.pop('type'))
        outputs = parallel_test(
            model_type,
            model_args,
            checkpoint_path,
            dataset,
            _data_func,
            range(cfg.gpus),
            workers_per_gpu=cfg.proc_per_gpu)
    # debug
    
    if out_file:  
        print('writing results to {}'.format(out_file))  
        mmcv.dump(outputs, out_file)  # 先把模型的测试结果输出到文件中: 如果文件不存在会创建  
        eval_types = eval_type
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = out_file
#                coco_eval(result_file, eval_types, dataset.coco)  # result_file传入coco_eval()
                """用自己写的evaluation()"""
                evaluation(result_file, dataset.coco, eval_types=eval_types)
            
            else:
                if not isinstance(outputs[0], dict):
                    result_file = out_file + '.json'
                    results2json(dataset, outputs, result_file)
#                    coco_eval(result_file, eval_types, dataset.coco)
                    """用自己写的evaluation()"""
                    evaluation(result_file, dataset.coco, eval_types=eval_types)
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = out_file + '.{}.json'.format(name)
                        results2json(dataset, outputs_, result_file)
                        coco_eval(result_file, eval_types, dataset.coco)