def after_train_epoch(self, runner): if not self.evaluation_flag(runner): return from mmdet.apis import multi_gpu_test tmpdir = self.tmpdir if tmpdir is None: tmpdir = osp.join(runner.work_dir, '.eval_hook') results = multi_gpu_test( runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect) if runner.rank == 0: print('\n') key_score = self.evaluate(runner, results) if self.save_best: best_score = runner.meta['hook_msgs'].get( 'best_score', self.init_value_map[self.rule]) if self.compare_func(key_score, best_score): best_score = key_score runner.meta['hook_msgs']['best_score'] = best_score last_ckpt = runner.meta['hook_msgs']['last_ckpt'] runner.meta['hook_msgs']['best_ckpt'] = last_ckpt mmcv.symlink( last_ckpt, osp.join(runner.work_dir, f'best_{self.key_indicator}.pth')) self.logger.info( f'Now best checkpoint is {last_ckpt}.' f'Best {self.key_indicator} is {best_score:0.4f}')
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None): if meta is None: meta = dict(epoch=self.epoch + 1, iter=self.iter) else: meta.update(epoch=self.epoch + 1, iter=self.iter) filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) linkpath = osp.join(out_dir, 'latest.pth') optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) # use relative symlink mmcv.symlink(filename, linkpath) filename_tmpl = 'adv_' + filename_tmpl filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) linkpath = osp.join(out_dir, 'adv_latest.pth') optimizer = self.adv_optimizer if save_optimizer else None save_checkpoint(self.adv_model, filepath, optimizer=optimizer, meta=meta) # use relative symlink mmcv.symlink(filename, linkpath)
def after_train_epoch(self, runner): self.update_attr(runner.model) # save ema model if not self.every_n_epochs(runner, self.interval): return if not self.out_dir: self.out_dir = runner.work_dir meta = runner.meta if meta is None: meta = dict(epoch=runner.epoch + 1, iter=runner.iter) else: meta.update(epoch=runner.epoch + 1, iter=runner.iter) filename = 'epoch_ema_{}.pth'.format(runner.epoch + 1) filepath = osp.join(self.out_dir, filename) optimizer = runner.optimizer if self.save_optimizer else None save_checkpoint(self.ema, filepath, optimizer=optimizer, meta=meta) if self.create_symlink: mmcv.symlink(filename, osp.join(self.out_dir, 'latest_ema.pth')) # remove other checkpoints if self.max_keep_ckpts > 0: filename_tmpl = self.args.get('filename_tmpl', 'epoch_ema_{}.pth') current_epoch = runner.epoch + 1 for epoch in range(current_epoch - self.max_keep_ckpts, 0, -1): ckpt_path = os.path.join(self.out_dir, filename_tmpl.format(epoch)) if os.path.exists(ckpt_path): os.remove(ckpt_path) else: break
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True): 'Save the checkpoint.\n\n Args:\n out_dir (str): The directory that checkpoints are saved.\n filename_tmpl (str, optional): The checkpoint filename template,\n which contains a placeholder for the epoch number.\n Defaults to \'epoch_{}.pth\'.\n save_optimizer (bool, optional): Whether to save the optimizer to\n the checkpoint. Defaults to True.\n meta (dict, optional): The meta information to be saved in the\n checkpoint. Defaults to None.\n create_symlink (bool, optional): Whether to create a symlink\n "latest.pth" to point to the latest checkpoint.\n Defaults to True.\n ' if (meta is None): meta = dict(epoch=(self.epoch + 1), iter=self.iter) elif isinstance(meta, dict): meta.update(epoch=(self.epoch + 1), iter=self.iter) else: raise TypeError(''.join([ 'meta should be a dict or None, but got ', '{}'.format(type(meta)) ])) if (self.meta is not None): meta.update(self.meta) filename = filename_tmpl.format((self.epoch + 1)) filepath = osp.join(out_dir, filename) optimizer = (self.optimizer if save_optimizer else None) save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) if create_symlink: dst_file = osp.join(out_dir, 'latest.pth') if (platform.system() != 'Windows'): mmcv.symlink(filename, dst_file) else: shutil.copy(filepath, dst_file)
def eval_best(work_dir,log_filename, metric): iteration = 0 best_metric = -float('inf') latest_iteration = 0 with open(log_filename) as f: for line in f.readlines(): if not line.startswith('2021'): continue parse_line = line.strip().split(' ') if len(parse_line) < 8: continue if parse_line[7] == 'Saving': iteration = int(parse_line[10]) latest_iteration = iteration if parse_line[7].startswith(metric): current_metric = float(parse_line[7].split(':')[1]) if current_metric > best_metric: mmcv.symlink(f'iter_{iteration}.pth', osp.join(work_dir, 'eval_best.pth')) best_metric = current_metric with open(log_filename) as f: for line in f.readlines(): if not line.startswith('2021'): continue parse_line = line.strip().split(' ') if len(parse_line) < 8: continue if parse_line[7] == 'Saving': iteration = int(parse_line[10]) if parse_line[7].startswith(metric): current_metric = float(parse_line[7].split(':')[1]) if current_metric != best_metric and iteration != latest_iteration: os.remove(osp.join(work_dir, f'iter_{iteration}.pth')) pass
def save_checkpoint(self, out_dir, filename_tmpl='iter_{}.pth', meta=None, save_optimizer=True, create_symlink=True): "Save checkpoint to file.\n\n Args:\n out_dir (str): Directory to save checkpoint files.\n filename_tmpl (str, optional): Checkpoint file template.\n Defaults to 'iter_{}.pth'.\n meta (dict, optional): Metadata to be saved in checkpoint.\n Defaults to None.\n save_optimizer (bool, optional): Whether save optimizer.\n Defaults to True.\n create_symlink (bool, optional): Whether create symlink to the\n latest checkpoint file. Defaults to True.\n " if (meta is None): meta = dict(iter=(self.iter + 1), epoch=(self.epoch + 1)) elif isinstance(meta, dict): meta.update(iter=(self.iter + 1), epoch=(self.epoch + 1)) else: raise TypeError(''.join([ 'meta should be a dict or None, but got ', '{}'.format(type(meta)) ])) if (self.meta is not None): meta.update(self.meta) filename = filename_tmpl.format((self.iter + 1)) filepath = osp.join(out_dir, filename) optimizer = (self.optimizer if save_optimizer else None) save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) if create_symlink: dst_file = osp.join(out_dir, 'latest.pth') if (platform.system() != 'Windows'): mmcv.symlink(filename, dst_file) else: shutil.copy(filename, dst_file)
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True): """Save the checkpoint. Args: out_dir (str): The directory that checkpoints are saved. filename_tmpl (str, optional): The checkpoint filename template, which contains a placeholder for the epoch number. Defaults to 'epoch_{}.pth'. save_optimizer (bool, optional): Whether to save the optimizer to the checkpoint. Defaults to True. meta (dict, optional): The meta information to be saved in the checkpoint. Defaults to None. create_symlink (bool, optional): Whether to create a symlink "latest.pth" to point to the latest checkpoint. Defaults to True. """ if meta is None: meta = dict(epoch=self.epoch + 1, iter=self.iter) else: meta.update(epoch=self.epoch + 1, iter=self.iter) filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) # in some environments, `os.symlink` is not supported, you may need to # set `create_symlink` to False if create_symlink: mmcv.symlink(filename, osp.join(out_dir, 'latest.pth'))
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True): if meta is None: meta = dict(epoch=self.epoch + 1, iter=self.iter) elif isinstance(meta, dict): meta.update(epoch=self.epoch + 1, iter=self.iter) else: raise TypeError( f'meta should be a dict or None, but got {type(meta)}') if self.meta is not None: meta.update(self.meta) filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) if create_symlink: dst_file = osp.join(out_dir, 'latest.pth') mmcv.symlink(filename, dst_file)
def symlink_images_to_target(image_list, target_path): for source_image_path in image_list: img_filename = os.path.basename(source_image_path) target_img_path = os.path.join(target_path, img_filename) mmcv.symlink(source_image_path, target_img_path) print('images and annotations are copied to: {}'.format(target_path))
def save_best_checkpoint(self, runner, key_score): best_score = runner.meta['hook_msgs'].get( 'best_score', self.init_value_map[self.rule]) if self.compare_func(key_score, best_score): best_score = key_score runner.meta['hook_msgs']['best_score'] = best_score last_ckpt = runner.meta['hook_msgs']['last_ckpt'] runner.meta['hook_msgs']['best_ckpt'] = last_ckpt mmcv.symlink( last_ckpt, osp.join(runner.work_dir, f'best_{self.key_indicator}.pth')) time_stamp = runner.epoch + 1 if self.by_epoch else runner.iter + 1 self.logger.info(f'Now best checkpoint is epoch_{time_stamp}.pth.' f'Best {self.key_indicator} is {best_score:0.4f}')
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None): if meta is None: meta = dict(epoch=self.epoch + 1, iter=self.iter) else: meta.update(epoch=self.epoch + 1, iter=self.iter) filename = osp.join(out_dir, filename_tmpl.format(self.epoch + 1)) linkname = osp.join(out_dir, 'latest.pth') optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filename, optimizer=optimizer, meta=meta) mmcv.symlink(filename, linkname)
def save_checkpoint(self, out_dir, filename_tmpl='iter_{}.pth', meta=None, save_optimizer=True, create_symlink=True): """Save checkpoint to file. Args: out_dir (str): Directory to save checkpoint files. filename_tmpl (str, optional): Checkpoint file template. Defaults to 'iter_{}.pth'. meta (dict, optional): Metadata to be saved in checkpoint. Defaults to None. save_optimizer (bool, optional): Whether save optimizer. Defaults to True. create_symlink (bool, optional): Whether create symlink to the latest checkpoint file. Defaults to True. """ if meta is None: meta = dict(iter=self.iter + 1, epoch=self.epoch + 1) elif isinstance(meta, dict): meta.update(iter=self.iter + 1, epoch=self.epoch + 1) else: raise TypeError( f'meta should be a dict or None, but got {type(meta)}') if self.meta is not None: meta.update(self.meta) filename = filename_tmpl.format(self.iter + 1) filepath = osp.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None _loss_scaler = self.loss_scaler if self.with_fp16_grad_scaler else None save_checkpoint(self.model, filepath, optimizer=optimizer, loss_scaler=_loss_scaler, save_apex_amp=self.use_apex_amp, meta=meta) # in some environments, `os.symlink` is not supported, you may need to # set `create_symlink` to False if create_symlink: dst_file = osp.join(out_dir, 'latest.pth') if platform.system() != 'Windows': mmcv.symlink(filename, dst_file) else: shutil.copy(filepath, dst_file)
def save_checkpoint( self, out_dir, filename_tmpl="epoch_{}.pth", save_optimizer=True, meta=None, create_symlink=True, ): """Save the checkpoint. Args: out_dir (str): The directory that checkpoints are saved. filename_tmpl (str, optional): The checkpoint filename template, which contains a placeholder for the epoch number. Defaults to 'epoch_{}.pth'. save_optimizer (bool, optional): Whether to save the optimizer to the checkpoint. Defaults to True. meta (dict, optional): The meta information to be saved in the checkpoint. Defaults to None. create_symlink (bool, optional): Whether to create a symlink "latest.pth" to point to the latest checkpoint. Defaults to True. """ if meta is None: meta = dict(epoch=self.epoch + 1, iter=self.iter) elif isinstance(meta, dict): meta.update(epoch=self.epoch + 1, iter=self.iter) else: raise TypeError( f"meta should be a dict or None, but got {type(meta)}") if self.meta is not None: meta.update(self.meta) filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) self.model.save_checkpoint(out_dir, tag="ds", client_state=meta) # in some environments, `os.symlink` is not supported, you may need to # set `create_symlink` to False if create_symlink: dst_file = osp.join(out_dir, "latest.pth") if platform.system() != "Windows": mmcv.symlink(filename, dst_file) else: shutil.copy(filepath, dst_file)
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True): """Save the checkpoint. Args: out_dir (str): The directory that checkpoints are saved. filename_tmpl (str, optional): The checkpoint filename template, which contains a placeholder for the epoch number. Defaults to 'epoch_{}.pth'. save_optimizer (bool, optional): Whether to save the optimizer to the checkpoint. Defaults to True. meta (dict, optional): The meta information to be saved in the checkpoint. Defaults to None. create_symlink (bool, optional): Whether to create a symlink "latest.pth" to point to the latest checkpoint. Defaults to True. """ if meta is None: meta = {} elif not isinstance(meta, dict): raise TypeError( f'meta should be a dict or None, but got {type(meta)}') if self.meta is not None: meta.update(self.meta) # Note: meta.update(self.meta) should be done before # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise # there will be problems with resumed checkpoints. # More details in https://github.com/open-mmlab/mmcv/pull/1108 meta.update(epoch=self.epoch + 1, iter=self.iter) filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) # in some environments, `os.symlink` is not supported, you may need to # set `create_symlink` to False if create_symlink: dst_file = osp.join(out_dir, 'latest.pth') if platform.system() != 'Windows': mmcv.symlink(filename, dst_file) else: shutil.copy(filepath, dst_file)
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None, create_symlink=True): if meta is None: meta = dict(epoch=self.epoch + 1, iter=self.iter) else: meta.update(epoch=self.epoch + 1, iter=self.iter) filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) # in some environments, `os.symlink` is not supported, you may need to # set `create_symlink` to False if create_symlink: mmcv.symlink(filename, osp.join(out_dir, 'latest.pth'))
def after_train_epoch(self, runner): # Synchronization of BatchNorm's buffer (running_mean # and running_var) is not supported in the DDP of pytorch, # which may cause the inconsistent performance of models in # different ranks, so we broadcast BatchNorm's buffers # of rank 0 to other ranks to avoid this. if self.broadcast_bn_buffer: model = runner.model for name, module in model.named_modules(): if isinstance(module, _BatchNorm) and module.track_running_stats: dist.broadcast(module.running_var, 0) dist.broadcast(module.running_mean, 0) if not self.evaluation_flag(runner): return from mmdet.apis import multi_gpu_test tmpdir = self.tmpdir if tmpdir is None: tmpdir = osp.join(runner.work_dir, '.eval_hook') results = multi_gpu_test(runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect) if runner.rank == 0: print('\n') key_score = self.evaluate(runner, results) if self.save_best: best_score = runner.meta['hook_msgs'].get( 'best_score', self.init_value_map[self.rule]) if self.compare_func(key_score, best_score): best_score = key_score runner.meta['hook_msgs']['best_score'] = best_score last_ckpt = runner.meta['hook_msgs']['last_ckpt'] runner.meta['hook_msgs']['best_ckpt'] = last_ckpt mmcv.symlink( last_ckpt, osp.join(runner.work_dir, f'best_{self.key_indicator}.pth')) self.logger.info( f'Now best checkpoint is {last_ckpt}.' f'Best {self.key_indicator} is {best_score:0.4f}')
def after_train_epoch(self, runner): if not self.evaluation_flag(runner): return from mmdet.apis import single_gpu_test results = single_gpu_test(runner.model, self.dataloader, show=False) key_score = self.evaluate(runner, results) if self.save_best: best_score = runner.meta['hook_msgs'].get( 'best_score', self.init_value_map[self.rule]) if self.compare_func(key_score, best_score): best_score = key_score runner.meta['hook_msgs']['best_score'] = best_score last_ckpt = runner.meta['hook_msgs']['last_ckpt'] runner.meta['hook_msgs']['best_ckpt'] = last_ckpt mmcv.symlink( last_ckpt, osp.join(runner.work_dir, f'best_{self.key_indicator}.pth')) self.logger.info( f'Now best checkpoint is epoch_{runner.epoch + 1}.pth.' f'Best {self.key_indicator} is {best_score:0.4f}')
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=dict(), offset=1, iter_no_offset=False): global_step = get_global_step() # when we save checkpoint after the epoch finished, the iter has already plus 1. iter_offset = 0 if iter_no_offset else offset meta.update(epoch=self.epoch + offset, iter=self.iter + iter_offset, inner_iter=self.inner_iter + iter_offset, global_step=global_step, batchsize=self.batchsize, initial_lr=self.initial_lr) filename = osp.join(out_dir, filename_tmpl.format(self.epoch + offset)) local_filename = "./{}".format( filename_tmpl.format(self.epoch + offset)) optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filename, optimizer=optimizer, meta=meta) mmcv.symlink(local_filename, osp.join(out_dir, 'latest.pth'))
def save_checkpoint(self, out_dir, filename_tmpl='epoch_{}.pth', save_optimizer=True, meta=None): """remove symlink to avoid error in windows file system""" if meta is None: meta = dict(epoch=self.epoch + 1, iter=self.iter) else: meta.update(epoch=self.epoch + 1, iter=self.iter) filename = filename_tmpl.format(self.epoch + 1) filepath = osp.join(out_dir, filename) linkpath = osp.join(out_dir, 'latest.pth') optimizer = self.optimizer if save_optimizer else None save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) if self.ema_model is not None: save_checkpoint(self.ema_model, f'{filepath}-ema.pth') # use relative symlink try: mmcv.symlink(filename, linkpath) except: print('Failed to symlink from {} to {}.'.format( filename, linkpath))