def __init__(self, cfg):
     super().__init__()
     self.cfg = cfg
     # 创建基础模型
     from utils.prepare_training import get_model
     self.backbone = get_model(cfg.backbone)
     if cfg.neck:  # 不能写成 is not None, 因为结果是{}不是None, 但可以用True/False来判断
         self.neck = get_model(cfg.neck)
     if cfg.head:
         self.cls_head = get_model(cfg.head)
     # 初始化: 注意权重需要送入cpu/gpu,该步在model.to()完成
     self.init_weights()
Exemple #2
0
def eval_dataset_cls(cfg_path, device=None):
    """分类问题的eval dataset: 
    等效于runner中的load_from + val,但可用来脱离runner进行独立的数据集验证
    """
    # 准备验证所用的对象
    cfg = get_config(cfg_path)
    dataset = get_dataset(cfg.valset, cfg.transform_val)
    dataloader = get_dataloader(dataset, cfg.valloader)
    model = get_model(cfg)
    if device is None:
        device = torch.device(cfg.load_device)
    # TODO: 如下两句的顺序
    load_checkpoint(model, cfg.load_from, device)
    model = model.to(device)
    # 开始验证
    buffer = {'acc': []}
    n_correct = 0
    model.eval()
    for c_iter, data_batch in enumerate(dataloader):
        with torch.no_grad():  # 停止反向传播,只进行前向计算
            img = to_device(data_batch['img'], device)
            label = to_device(data_batch['gt_labels'], device)

            y_pred = model(img)
            label = torch.cat(label, dim=0)
            acc1 = accuracy(y_pred, label, topk=1)
            buffer['acc'].append(acc1)
        # 计算总体精度
        n_correct += buffer['acc'][-1] * len(data_batch['gt_labels'])

    vis_loss_acc(buffer, title='eval dataset')
    print('ACC on dataset: %.3f', n_correct / len(dataset))
def onnx_exporter(cfg):
    """把一个pytorch模型转换成onnx模型。
    对模型的要求:
    1. 模型需要有forward_dummy()函数的实施,如下是一个实例:
    def forward_dummy(self, img):
        x = self.extract_feat(img)
        x = self.bbox_head(x)
        return x
    2. 模型的终端输出,也就是head端的输出必须是tuple/list/variable类型,不能是dict,否则当前pytorch.onnx不支持。
    """
    img_shape = (1, 3) + cfg.img_size
    dummy_input = torch.randn(img_shape, device='cuda')

    # 创建配置和创建模型
    model = get_model(cfg).cuda()
    if cfg.load_from is not None:
        _ = load_checkpoint(model, cfg.load_from)
    else:
        raise ValueError('need to assign checkpoint path to load from.')

    model.forward = model.forward_dummy
    torch.onnx.export(model,
                      dummy_input,
                      cfg.work_dir + cfg.model_name + '.onnx',
                      verbose=True)
Exemple #4
0
 def __init__(self, cfg_path, load_from=None, load_device=None):
     self.type = 'det'  # 用来判断是什么类型的预测器
     # 准备验证所用的对象
     self.cfg = get_config(cfg_path)
     # 为了便于eval,不必常去修改cfg里边的设置,直接在func里边添加2个参数即可
     if load_from is not None:
         self.cfg.load_from = load_from
     if load_device is not None:
         self.cfg.load_device = load_device
     self.model = get_model(self.cfg)
     self.device = torch.device(self.cfg.load_device)
     load_checkpoint(self.model, self.cfg.load_from, self.device)
     self.model = self.model.to(self.device)
Exemple #5
0
def eval_dataset_det(cfg_path,
                     load_from=None,
                     load_device=None,
                     resume_from=None,
                     result_file=None):
    """检测问题的eval dataset: 
    为了便于eval,添加2个形参参数,不必常去修改cfg里边的设置
    """
    # 准备验证所用的对象
    cfg = get_config(cfg_path)
    cfg.valloader.params.batch_size = 1  # 强制固定验证时batch_size=1
    # 为了便于eval,不必常去修改cfg里边的设置,直接在func里边添加几个参数即可
    if load_from is not None:
        cfg.load_from = load_from
    if load_device is not None:
        cfg.load_device = load_device
    if resume_from is not None:
        cfg.resume_from = resume_from

    dataset = get_dataset(cfg.valset, cfg.transform_val)
    dataloader = get_dataloader(dataset, cfg.valloader, len(cfg.gpus))

    model = get_model(cfg)
    device = torch.device(cfg.load_device)
    load_checkpoint(model, cfg.load_from, device)
    model = model.to(device)
    # 如果没有验证过
    if result_file is None:
        # 开始验证
        model.eval()
        all_bbox_cls = []
        for c_iter, data_batch in enumerate(dataloader):
            with torch.no_grad():  # 停止反向传播,只进行前向计算
                bbox_det = batch_detector(
                    model, data_batch, device,
                    return_loss=False)['bboxes']  # 提取bbox即可(n_cls,)(m,5)
                # 显示进度
                if c_iter % 100 == 0:
                    print('%d / %d finished predict.' % (c_iter, len(dataset)))

            all_bbox_cls.append(bbox_det)  # (n_img,)(n_class,)(k,5)
        # 保存预测结果到文件
        filename = get_time_str() + '_eval_result.pkl'
        save2pkl(all_bbox_cls, cfg.work_dir + filename)
    # 如果有现成验证文件
    else:
        all_bbox_cls = loadvar(result_file)
    # 评估
    voc_eval(all_bbox_cls, dataset, iou_thr=0.5)
Exemple #6
0
 def __init__(self, cfg_path, load_from=None, load_device=None):
     super().__init__()
     self.type = 'cls'
     # 准备验证所用的对象
     self.cfg = get_config(cfg_path)
     # 为了便于eval,不必常去修改cfg里边的设置,直接在func里边添加2个参数即可
     if load_from is not None:
         self.cfg.load_from = load_from
     if load_device is not None:
         self.cfg.load_device = load_device
     self.model = get_model(self.cfg)
     self.device = torch.device(self.cfg.load_device)
     if self.cfg.load_from is not None or self.cfg.resume_from is not None:
         load_checkpoint(self.model, self.cfg.load_from, self.device)
     self.model = self.model.to(self.device)
Exemple #7
0
    def __init__(self, cfg, resume_from=None):
        # 共享变量: 需要声明在resume/load之前,否则会把resume的东西覆盖
        self.c_epoch = 0
        self.c_iter = 0
        self.weight_ready = False
        self.buffer = {'loss': [], 'acc1': [], 'acc5': [], 'lr': []}
        # 获得配置
        self.cfg = cfg
        if resume_from is not None:
            self.cfg.resume_from = resume_from  # runner可以直接修改resume_from,避免修改cfg文件
        # 检查文件夹和文件是否合法
        self.check_dir_file(self.cfg)
        #设置logger
        self.logger = get_logger(self.cfg.logger)
        self.logger.info('start logging info.')
        #设置设备: 如果是分布式,则不同local rank(不同进程号)返回的是不同设备
        self.device = get_device(self.cfg, self.logger)
        #创建batch处理器
        self.batch_processor = get_batch_processor(self.cfg)
        #创建数据集
        self.trainset = get_dataset(self.cfg.trainset, self.cfg.transform)
        self.valset = get_dataset(
            self.cfg.valset, self.cfg.transform_val)  # 做验证的变换只做基础变换,不做数据增强

        #        tmp1 = self.trainset[91]      # for debug: 可查看dataset __getitem__
        #        img = tmp1['img']
        #        label = tmp1['gt_labels']
        #        bbox = tmp1['gt_bboxes']
        #        ldmk = tmp1['gt_landmarks']
        #        from utils.transform import transform_inv
        #        class_names = self.trainset.CLASSES
        #        label = label - 1 # 恢复从0为起点,从而跟CLASS匹配
        #        transform_inv(img, bbox, label, ldmk, mean=self.cfg.transform.img_params.mean,
        #                      std=self.cfg.transform.img_params.std, class_names=class_names, show=True)

        #创建数据加载器
        self.dataloader = get_dataloader(self.trainset,
                                         self.cfg.trainloader,
                                         len(self.cfg.gpus),
                                         dist=self.cfg.distribute)
        self.valloader = get_dataloader(self.valset,
                                        self.cfg.valloader,
                                        len(self.cfg.gpus),
                                        dist=self.cfg.distribute)

        #        tmp2 = next(iter(self.dataloader))  # for debug: 设置worker=0就可查看collate_fn

        # 创建模型并初始化
        if self.cfg.load_from is not None or self.cfg.resume_from is not None:
            self.cfg.backbone.params.pretrained = None  # 如果load_from或resume_from,则不加载pretrained
        self.model = get_model(self.cfg)

        # 优化器:必须在model送入cuda之前创建
        self.optimizer = get_optimizer(self.cfg.optimizer, self.model)
        # 学习率调整器
        self.lr_processor = get_lr_processor(self, self.cfg.lr_processor)
        # 送入GPU
        # 包装并行模型是在optimizer提取参数之后,否则可能导致无法提取,因为并行模型在model之下加了一层module壳
        self.model = get_model_wrapper(self.model, self.cfg)
        self.model.to(self.device)
        # 注意:恢复或加载是直接加载到目标设备,所以必须在模型传入设备之后进行,确保设备匹配
        # 加载模型权重和训练参数,从之前断开的位置继续训练
        if self.cfg.resume_from:
            self.resume_training(checkpoint_path=self.cfg.resume_from,
                                 map_location=self.device)  # 沿用设置的device
        # 加载模型权重,但没有训练参数,所以一般用来做预测
        elif self.cfg.load_from:
            load_device = torch.device(self.cfg.load_device)
            self._load_checkpoint(checkpoint_path=self.cfg.load_from,
                                  map_location=load_device)
            self.weight_ready = True