Exemple #1
0
    def after_train_epoch(self, runner):
        if not self.evaluation_flag(runner):
            return

        from mmdet.apis import multi_gpu_test
        tmpdir = self.tmpdir
        if tmpdir is None:
            tmpdir = osp.join(runner.work_dir, '.eval_hook')
        results = multi_gpu_test(
            runner.model,
            self.dataloader,
            tmpdir=tmpdir,
            gpu_collect=self.gpu_collect)
        if runner.rank == 0:
            print('\n')
            key_score = self.evaluate(runner, results)
            if self.save_best:
                best_score = runner.meta['hook_msgs'].get(
                    'best_score', self.init_value_map[self.rule])
                if self.compare_func(key_score, best_score):
                    best_score = key_score
                    runner.meta['hook_msgs']['best_score'] = best_score
                    last_ckpt = runner.meta['hook_msgs']['last_ckpt']
                    runner.meta['hook_msgs']['best_ckpt'] = last_ckpt
                    mmcv.symlink(
                        last_ckpt,
                        osp.join(runner.work_dir,
                                 f'best_{self.key_indicator}.pth'))
                    self.logger.info(
                        f'Now best checkpoint is {last_ckpt}.'
                        f'Best {self.key_indicator} is {best_score:0.4f}')
Exemple #2
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None):

        if meta is None:
            meta = dict(epoch=self.epoch + 1, iter=self.iter)
        else:
            meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        linkpath = osp.join(out_dir, 'latest.pth')
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        # use relative symlink
        mmcv.symlink(filename, linkpath)

        filename_tmpl = 'adv_' + filename_tmpl
        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        linkpath = osp.join(out_dir, 'adv_latest.pth')
        optimizer = self.adv_optimizer if save_optimizer else None
        save_checkpoint(self.adv_model,
                        filepath,
                        optimizer=optimizer,
                        meta=meta)
        # use relative symlink
        mmcv.symlink(filename, linkpath)
    def after_train_epoch(self, runner):
        self.update_attr(runner.model)

        # save ema model
        if not self.every_n_epochs(runner, self.interval):
            return
        if not self.out_dir:
            self.out_dir = runner.work_dir

        meta = runner.meta

        if meta is None:
            meta = dict(epoch=runner.epoch + 1, iter=runner.iter)
        else:
            meta.update(epoch=runner.epoch + 1, iter=runner.iter)

        filename = 'epoch_ema_{}.pth'.format(runner.epoch + 1)
        filepath = osp.join(self.out_dir, filename)
        optimizer = runner.optimizer if self.save_optimizer else None
        save_checkpoint(self.ema, filepath, optimizer=optimizer, meta=meta)
        if self.create_symlink:
            mmcv.symlink(filename, osp.join(self.out_dir, 'latest_ema.pth'))

        # remove other checkpoints
        if self.max_keep_ckpts > 0:
            filename_tmpl = self.args.get('filename_tmpl', 'epoch_ema_{}.pth')
            current_epoch = runner.epoch + 1
            for epoch in range(current_epoch - self.max_keep_ckpts, 0, -1):
                ckpt_path = os.path.join(self.out_dir,
                                         filename_tmpl.format(epoch))
                if os.path.exists(ckpt_path):
                    os.remove(ckpt_path)
                else:
                    break
Exemple #4
0
 def save_checkpoint(self,
                     out_dir,
                     filename_tmpl='epoch_{}.pth',
                     save_optimizer=True,
                     meta=None,
                     create_symlink=True):
     'Save the checkpoint.\n\n        Args:\n            out_dir (str): The directory that checkpoints are saved.\n            filename_tmpl (str, optional): The checkpoint filename template,\n                which contains a placeholder for the epoch number.\n                Defaults to \'epoch_{}.pth\'.\n            save_optimizer (bool, optional): Whether to save the optimizer to\n                the checkpoint. Defaults to True.\n            meta (dict, optional): The meta information to be saved in the\n                checkpoint. Defaults to None.\n            create_symlink (bool, optional): Whether to create a symlink\n                "latest.pth" to point to the latest checkpoint.\n                Defaults to True.\n        '
     if (meta is None):
         meta = dict(epoch=(self.epoch + 1), iter=self.iter)
     elif isinstance(meta, dict):
         meta.update(epoch=(self.epoch + 1), iter=self.iter)
     else:
         raise TypeError(''.join([
             'meta should be a dict or None, but got ',
             '{}'.format(type(meta))
         ]))
     if (self.meta is not None):
         meta.update(self.meta)
     filename = filename_tmpl.format((self.epoch + 1))
     filepath = osp.join(out_dir, filename)
     optimizer = (self.optimizer if save_optimizer else None)
     save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
     if create_symlink:
         dst_file = osp.join(out_dir, 'latest.pth')
         if (platform.system() != 'Windows'):
             mmcv.symlink(filename, dst_file)
         else:
             shutil.copy(filepath, dst_file)
def eval_best(work_dir,log_filename, metric):
    iteration = 0
    best_metric = -float('inf')
    latest_iteration = 0
    with open(log_filename) as f:
        for line in f.readlines():
            if not line.startswith('2021'):
                continue
            parse_line = line.strip().split(' ')
            if len(parse_line) < 8:
                continue
            if parse_line[7] == 'Saving':
                iteration = int(parse_line[10])
                latest_iteration = iteration
            if parse_line[7].startswith(metric):
                current_metric = float(parse_line[7].split(':')[1])
                if current_metric > best_metric:
                    mmcv.symlink(f'iter_{iteration}.pth', osp.join(work_dir, 'eval_best.pth'))
                    best_metric = current_metric
    with open(log_filename) as f:
        for line in f.readlines():
            if not line.startswith('2021'):
                continue
            parse_line = line.strip().split(' ')
            if len(parse_line) < 8:
                continue
            if parse_line[7] == 'Saving':
                iteration = int(parse_line[10])
            if parse_line[7].startswith(metric):
                current_metric = float(parse_line[7].split(':')[1])
                if current_metric != best_metric and iteration != latest_iteration:
                    os.remove(osp.join(work_dir, f'iter_{iteration}.pth'))

    pass
 def save_checkpoint(self,
                     out_dir,
                     filename_tmpl='iter_{}.pth',
                     meta=None,
                     save_optimizer=True,
                     create_symlink=True):
     "Save checkpoint to file.\n\n        Args:\n            out_dir (str): Directory to save checkpoint files.\n            filename_tmpl (str, optional): Checkpoint file template.\n                Defaults to 'iter_{}.pth'.\n            meta (dict, optional): Metadata to be saved in checkpoint.\n                Defaults to None.\n            save_optimizer (bool, optional): Whether save optimizer.\n                Defaults to True.\n            create_symlink (bool, optional): Whether create symlink to the\n                latest checkpoint file. Defaults to True.\n        "
     if (meta is None):
         meta = dict(iter=(self.iter + 1), epoch=(self.epoch + 1))
     elif isinstance(meta, dict):
         meta.update(iter=(self.iter + 1), epoch=(self.epoch + 1))
     else:
         raise TypeError(''.join([
             'meta should be a dict or None, but got ',
             '{}'.format(type(meta))
         ]))
     if (self.meta is not None):
         meta.update(self.meta)
     filename = filename_tmpl.format((self.iter + 1))
     filepath = osp.join(out_dir, filename)
     optimizer = (self.optimizer if save_optimizer else None)
     save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
     if create_symlink:
         dst_file = osp.join(out_dir, 'latest.pth')
         if (platform.system() != 'Windows'):
             mmcv.symlink(filename, dst_file)
         else:
             shutil.copy(filename, dst_file)
Exemple #7
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None,
                        create_symlink=True):
        """Save the checkpoint.

        Args:
            out_dir (str): The directory that checkpoints are saved.
            filename_tmpl (str, optional): The checkpoint filename template,
                which contains a placeholder for the epoch number.
                Defaults to 'epoch_{}.pth'.
            save_optimizer (bool, optional): Whether to save the optimizer to
                the checkpoint. Defaults to True.
            meta (dict, optional): The meta information to be saved in the
                checkpoint. Defaults to None.
            create_symlink (bool, optional): Whether to create a symlink
                "latest.pth" to point to the latest checkpoint.
                Defaults to True.
        """
        if meta is None:
            meta = dict(epoch=self.epoch + 1, iter=self.iter)
        else:
            meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        # in some environments, `os.symlink` is not supported, you may need to
        # set `create_symlink` to False
        if create_symlink:
            mmcv.symlink(filename, osp.join(out_dir, 'latest.pth'))
Exemple #8
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None,
                        create_symlink=True):

        if meta is None:
            meta = dict(epoch=self.epoch + 1, iter=self.iter)
        elif isinstance(meta, dict):
            meta.update(epoch=self.epoch + 1, iter=self.iter)
        else:
            raise TypeError(
                f'meta should be a dict or None, but got {type(meta)}')
        if self.meta is not None:
            meta.update(self.meta)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)

        if create_symlink:
            dst_file = osp.join(out_dir, 'latest.pth')
            mmcv.symlink(filename, dst_file)
Exemple #9
0
def symlink_images_to_target(image_list, target_path):

    for source_image_path in image_list:
        img_filename = os.path.basename(source_image_path)
        target_img_path = os.path.join(target_path, img_filename)
        mmcv.symlink(source_image_path, target_img_path)

    print('images and annotations are copied to: {}'.format(target_path))
Exemple #10
0
 def save_best_checkpoint(self, runner, key_score):
     best_score = runner.meta['hook_msgs'].get(
         'best_score', self.init_value_map[self.rule])
     if self.compare_func(key_score, best_score):
         best_score = key_score
         runner.meta['hook_msgs']['best_score'] = best_score
         last_ckpt = runner.meta['hook_msgs']['last_ckpt']
         runner.meta['hook_msgs']['best_ckpt'] = last_ckpt
         mmcv.symlink(
             last_ckpt,
             osp.join(runner.work_dir, f'best_{self.key_indicator}.pth'))
         time_stamp = runner.epoch + 1 if self.by_epoch else runner.iter + 1
         self.logger.info(f'Now best checkpoint is epoch_{time_stamp}.pth.'
                          f'Best {self.key_indicator} is {best_score:0.4f}')
Exemple #11
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None):
        if meta is None:
            meta = dict(epoch=self.epoch + 1, iter=self.iter)
        else:
            meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = osp.join(out_dir, filename_tmpl.format(self.epoch + 1))
        linkname = osp.join(out_dir, 'latest.pth')
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filename, optimizer=optimizer, meta=meta)
        mmcv.symlink(filename, linkname)
Exemple #12
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='iter_{}.pth',
                        meta=None,
                        save_optimizer=True,
                        create_symlink=True):
        """Save checkpoint to file.

        Args:
            out_dir (str): Directory to save checkpoint files.
            filename_tmpl (str, optional): Checkpoint file template.
                Defaults to 'iter_{}.pth'.
            meta (dict, optional): Metadata to be saved in checkpoint.
                Defaults to None.
            save_optimizer (bool, optional): Whether save optimizer.
                Defaults to True.
            create_symlink (bool, optional): Whether create symlink to the
                latest checkpoint file. Defaults to True.
        """
        if meta is None:
            meta = dict(iter=self.iter + 1, epoch=self.epoch + 1)
        elif isinstance(meta, dict):
            meta.update(iter=self.iter + 1, epoch=self.epoch + 1)
        else:
            raise TypeError(
                f'meta should be a dict or None, but got {type(meta)}')
        if self.meta is not None:
            meta.update(self.meta)

        filename = filename_tmpl.format(self.iter + 1)
        filepath = osp.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        _loss_scaler = self.loss_scaler if self.with_fp16_grad_scaler else None
        save_checkpoint(self.model,
                        filepath,
                        optimizer=optimizer,
                        loss_scaler=_loss_scaler,
                        save_apex_amp=self.use_apex_amp,
                        meta=meta)
        # in some environments, `os.symlink` is not supported, you may need to
        # set `create_symlink` to False
        if create_symlink:
            dst_file = osp.join(out_dir, 'latest.pth')
            if platform.system() != 'Windows':
                mmcv.symlink(filename, dst_file)
            else:
                shutil.copy(filepath, dst_file)
Exemple #13
0
    def save_checkpoint(
        self,
        out_dir,
        filename_tmpl="epoch_{}.pth",
        save_optimizer=True,
        meta=None,
        create_symlink=True,
    ):
        """Save the checkpoint.

        Args:
            out_dir (str): The directory that checkpoints are saved.
            filename_tmpl (str, optional): The checkpoint filename template,
                which contains a placeholder for the epoch number.
                Defaults to 'epoch_{}.pth'.
            save_optimizer (bool, optional): Whether to save the optimizer to
                the checkpoint. Defaults to True.
            meta (dict, optional): The meta information to be saved in the
                checkpoint. Defaults to None.
            create_symlink (bool, optional): Whether to create a symlink
                "latest.pth" to point to the latest checkpoint.
                Defaults to True.
        """
        if meta is None:
            meta = dict(epoch=self.epoch + 1, iter=self.iter)
        elif isinstance(meta, dict):
            meta.update(epoch=self.epoch + 1, iter=self.iter)
        else:
            raise TypeError(
                f"meta should be a dict or None, but got {type(meta)}")
        if self.meta is not None:
            meta.update(self.meta)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        self.model.save_checkpoint(out_dir, tag="ds", client_state=meta)
        # in some environments, `os.symlink` is not supported, you may need to
        # set `create_symlink` to False
        if create_symlink:
            dst_file = osp.join(out_dir, "latest.pth")
            if platform.system() != "Windows":
                mmcv.symlink(filename, dst_file)
            else:
                shutil.copy(filepath, dst_file)
Exemple #14
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None,
                        create_symlink=True):
        """Save the checkpoint.

        Args:
            out_dir (str): The directory that checkpoints are saved.
            filename_tmpl (str, optional): The checkpoint filename template,
                which contains a placeholder for the epoch number.
                Defaults to 'epoch_{}.pth'.
            save_optimizer (bool, optional): Whether to save the optimizer to
                the checkpoint. Defaults to True.
            meta (dict, optional): The meta information to be saved in the
                checkpoint. Defaults to None.
            create_symlink (bool, optional): Whether to create a symlink
                "latest.pth" to point to the latest checkpoint.
                Defaults to True.
        """
        if meta is None:
            meta = {}
        elif not isinstance(meta, dict):
            raise TypeError(
                f'meta should be a dict or None, but got {type(meta)}')
        if self.meta is not None:
            meta.update(self.meta)
            # Note: meta.update(self.meta) should be done before
            # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise
            # there will be problems with resumed checkpoints.
            # More details in https://github.com/open-mmlab/mmcv/pull/1108
        meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        # in some environments, `os.symlink` is not supported, you may need to
        # set `create_symlink` to False
        if create_symlink:
            dst_file = osp.join(out_dir, 'latest.pth')
            if platform.system() != 'Windows':
                mmcv.symlink(filename, dst_file)
            else:
                shutil.copy(filepath, dst_file)
Exemple #15
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None,
                        create_symlink=True):
        if meta is None:
            meta = dict(epoch=self.epoch + 1, iter=self.iter)
        else:
            meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        # in some environments, `os.symlink` is not supported, you may need to
        # set `create_symlink` to False
        if create_symlink:
            mmcv.symlink(filename, osp.join(out_dir, 'latest.pth'))
Exemple #16
0
    def after_train_epoch(self, runner):
        # Synchronization of BatchNorm's buffer (running_mean
        # and running_var) is not supported in the DDP of pytorch,
        # which may cause the inconsistent performance of models in
        # different ranks, so we broadcast BatchNorm's buffers
        # of rank 0 to other ranks to avoid this.
        if self.broadcast_bn_buffer:
            model = runner.model
            for name, module in model.named_modules():
                if isinstance(module,
                              _BatchNorm) and module.track_running_stats:
                    dist.broadcast(module.running_var, 0)
                    dist.broadcast(module.running_mean, 0)

        if not self.evaluation_flag(runner):
            return

        from mmdet.apis import multi_gpu_test
        tmpdir = self.tmpdir
        if tmpdir is None:
            tmpdir = osp.join(runner.work_dir, '.eval_hook')
        results = multi_gpu_test(runner.model,
                                 self.dataloader,
                                 tmpdir=tmpdir,
                                 gpu_collect=self.gpu_collect)
        if runner.rank == 0:
            print('\n')
            key_score = self.evaluate(runner, results)
            if self.save_best:
                best_score = runner.meta['hook_msgs'].get(
                    'best_score', self.init_value_map[self.rule])
                if self.compare_func(key_score, best_score):
                    best_score = key_score
                    runner.meta['hook_msgs']['best_score'] = best_score
                    last_ckpt = runner.meta['hook_msgs']['last_ckpt']
                    runner.meta['hook_msgs']['best_ckpt'] = last_ckpt
                    mmcv.symlink(
                        last_ckpt,
                        osp.join(runner.work_dir,
                                 f'best_{self.key_indicator}.pth'))
                    self.logger.info(
                        f'Now best checkpoint is {last_ckpt}.'
                        f'Best {self.key_indicator} is {best_score:0.4f}')
Exemple #17
0
 def after_train_epoch(self, runner):
     if not self.evaluation_flag(runner):
         return
     from mmdet.apis import single_gpu_test
     results = single_gpu_test(runner.model, self.dataloader, show=False)
     key_score = self.evaluate(runner, results)
     if self.save_best:
         best_score = runner.meta['hook_msgs'].get(
             'best_score', self.init_value_map[self.rule])
         if self.compare_func(key_score, best_score):
             best_score = key_score
             runner.meta['hook_msgs']['best_score'] = best_score
             last_ckpt = runner.meta['hook_msgs']['last_ckpt']
             runner.meta['hook_msgs']['best_ckpt'] = last_ckpt
             mmcv.symlink(
                 last_ckpt,
                 osp.join(runner.work_dir,
                          f'best_{self.key_indicator}.pth'))
             self.logger.info(
                 f'Now best checkpoint is epoch_{runner.epoch + 1}.pth.'
                 f'Best {self.key_indicator} is {best_score:0.4f}')
Exemple #18
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=dict(),
                        offset=1,
                        iter_no_offset=False):
        global_step = get_global_step()
        # when we save checkpoint after the epoch finished, the iter has already plus 1.
        iter_offset = 0 if iter_no_offset else offset
        meta.update(epoch=self.epoch + offset,
                    iter=self.iter + iter_offset,
                    inner_iter=self.inner_iter + iter_offset,
                    global_step=global_step,
                    batchsize=self.batchsize,
                    initial_lr=self.initial_lr)

        filename = osp.join(out_dir, filename_tmpl.format(self.epoch + offset))
        local_filename = "./{}".format(
            filename_tmpl.format(self.epoch + offset))
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filename, optimizer=optimizer, meta=meta)
        mmcv.symlink(local_filename, osp.join(out_dir, 'latest.pth'))
Exemple #19
0
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None):
        """remove symlink to avoid error in windows file system"""
        if meta is None:
            meta = dict(epoch=self.epoch + 1, iter=self.iter)
        else:
            meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = osp.join(out_dir, filename)
        linkpath = osp.join(out_dir, 'latest.pth')
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        if self.ema_model is not None:
            save_checkpoint(self.ema_model, f'{filepath}-ema.pth')
        # use relative symlink
        try:
            mmcv.symlink(filename, linkpath)
        except:
            print('Failed to symlink from {} to {}.'.format(
                filename, linkpath))