class Args(ta.TypedArgs): foo: int = ta.add_argument('--foo', type=int, default=42) bar: List[int] = ta.add_argument(nargs='*', default=[1, 2, 3]) config: List[str] = ta.add_argument('--config', default=[], type=str, action='append')
class Args5(TypedArgs): types: List[Union[str, int]] = (add_argument('--str', action='append_const', const=str), add_argument('--int', action='append_const', const=int))
class Args(ta.TypedArgs): foo: str = 'bar' data: str = ta.add_argument(metavar='DIR', help='path to dataset') arch: str = ta.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', help='model architecture (default: resnet18)') num_workers: int = ta.add_argument( '-j', '--workers', default=4, metavar='N', help='number of data loading workers (default: 4)')
class Args(BaseArgs): load_checkpoint: Optional[Path] = add_argument( '--load-checkpoint', required=False, help='path to the checkpoint file to be loaded') load_model: Optional[Path] = add_argument( '--load-model', required=False, help='path to the checkpoint file to be loaded, but only load model.') validate: bool = add_argument('--validate', action='store_true', help='Only run final validate then exit') moco_checkpoint: Optional[str] = add_argument('--mc', '--moco-checkpoint', help='load moco checkpoint') seed: Optional[int] = add_argument('--seed', help='random seed') world_size: int = add_argument('--ws', '--world-size', default=torch.cuda.device_count(), help='total processes') _continue: bool = add_argument( '--continue', action='store_true', help='Use previous config and checkpoint', ) no_scale_lr: bool = add_argument( '--no-scale-lr', action='store_true', help='Do not change lr according to batch size') def resolve_continue(self): if not self._continue: return if not self.experiment_dir.exists(): raise EnvironmentError( f'Experiment directory "{self.experiment_dir}" does not exists.' ) if self.config is None: run_id = -1 for run in self.experiment_dir.iterdir(): match = self.RUN_DIR_NAME_REGEX.match(run.name) if match is not None: this_run_id = int(match.group(1)) if this_run_id > run_id and run.is_dir(): this_config_path = run / 'config.json' if this_config_path.exists(): run_id = this_run_id self.config = this_config_path if self.config is None: raise EnvironmentError(f'No previous run config found') logger.info('Continue using previous config: "%s"', self.config) if self.load_checkpoint is None: checkpoint_path = self.experiment_dir / 'checkpoint.pth.tar' if checkpoint_path.exists(): self.load_checkpoint = checkpoint_path logger.info('Continue using previous checkpoint: "%s"', self.load_checkpoint) else: logger.warning('No previous checkpoint found')
class Args(TypedArgs): foo: str = 'bar' data: str = add_argument(metavar='DIR', help='path to dataset') arch: str = add_argument('-a', '--arch', metavar='ARCH', default='resnet18', help='model architecture (default: resnet18)') num_workers: int = add_argument( '-j', '--workers', default=4, metavar='N', help='number of data loading workers (default: 4)') def parser_factory(self): return argparse.ArgumentParser('PROG')
class BaseArgs(Base): device_id: int = ta.add_argument( '--device-id', type=int, default=DEFAULT_DEVICE_ID, # -1 for cpu ) @property def device(self) -> flow.device: # if self.device_id < 0: # return flow.device('cpu') # else: # return flow.device('cuda:{}'.format(self.device_id)) return flow.device('cuda:{}'.format(LOCAL_RANK)) def is_cuda_available(self) -> bool: return self.device_id > -1
class Args(TypedArgs): foo: str = add_argument('--foo') command: str = add_argument() args: List[str] = add_argument(nargs=argparse.REMAINDER)
class Args(TypedArgs): foo: List[str] = add_argument(nargs='+')
class Args(TypedArgs): foo: List[str] = add_argument('--foo', nargs='*') bar: List[str] = add_argument('--bar', nargs='*') baz: List[str] = add_argument(nargs='*')
class Args(TypedArgs): foo: Optional[str] = add_argument('-f', '--foo') bar: str = add_argument()
class Args(TypedArgs): foo: str = add_argument('--foo', nargs='?', const='c', default='d') bar: str = add_argument(nargs='?', default='d')
class BaseArgs(Base): rank_start: int = ta.add_argument( '--rank-start', type=int, default=0, help='' ) _dist_backend: str = ta.add_argument( "--dist-backend", type=str, choices=["nccl", "gloo"], default=None ) world_size: int = ta.add_argument( '--world-size', type=int, default=1, ) dist_url: Optional[str] = ta.add_argument( "--dist-url", type=str, ) @property def device(self) -> torch.device: if self.gpu and torch.cuda.is_available(): return torch.device('cuda') else: return torch.device('cpu') @property def dist_backend(self) -> str: backend = self._dist_backend if backend is None: if self.gpu and dist.is_nccl_available(): backend = "nccl" else: backend = "gloo" _logger.info("infer dist_backend: %s", backend) return backend def init_process_group_from_file(self, local_rank: int) -> int: rank = self.rank_start + local_rank init_process_group_from_file( self.dist_backend, self.experiment_dir / 'dist_init', world_size=self.world_size, rank=rank, ) return rank def init_process_group_from_tcp(self, local_rank: int) -> int: assert self.dist_url rank = self.rank_start + local_rank dist.init_process_group( self.dist_backend, init_method=self.dist_url, world_size=self.world_size, rank=rank, ) return rank def try_cuda_set_device(self, local_rank: int): if self.gpu: device_id = self.gpu[local_rank] torch.cuda.set_device(device_id)
class Args(TypedArgs): dlp: bool = add_argument('--dlp', action='store_true') tfds: bool = add_argument('--tfds', action='store_true') batch_size: int = add_argument('-b', '--batch-size', default=128) num_workers: int = add_argument('-n', '--num-workers', default=2)
class BaseArgs(ta.TypedArgs): config_file: Optional[Path] = ta.add_argument( "-c", "--config", type=Path, help="config 文件" ) experiment_dir: Path = ta.add_argument( "-e", "--experiment-dir", type=Path, default=Path("exps/000"), help="实验目录" ) apply: List[str] = ta.add_argument( "-a", "--apply", type=str, action="append", default=[], help="额外 config,可 merge 到 main config", ) print_freq: int = ta.add_argument( "--print-freq", "--pf", type=int, default=1000, help="显示 log 的频率,一般为10" ) temp_dir: Path = ta.add_argument( "--temp-dir", type=Path, default=Path("temp"), help="临时目录,记得定期删" ) debug: bool = ta.add_argument( "-d", "--debug", action="store_true", help="debug 模式") no_tqdm: bool = ta.add_argument( "--no-tqdm", action="store_true", help="关闭 tqdm") resume: Optional[Path] = ta.add_argument( "--resume", type=Path, help="resume checkpoint path" ) force: bool = ta.add_argument( "-f", "--force", action="store_true", help="移除旧实验目录到 temp dir,强制创建新实验目录" ) # 默认使用 cpu,后续可能加入 xla 支持 gpu: List[int] = ta.add_argument( "--gpu", type=parse_gpu_list, default=[], help="指定gpu,`1,2,5-7 -> [1,2,5,6,7]`" ) def try_make_experiment_dir(self): if self.experiment_dir.exists(): if self.force: timestamp = datetime.now().strftime('%Y_%m_%d-%H.%M.%S') new_experiment_name = self.experiment_dir.name + '-' + timestamp new_experiment_dir = self.temp_dir / new_experiment_name print( f"move old experiment dir from {self.experiment_dir} to {new_experiment_dir}" ) # 确保 temp dir 存在 self.temp_dir.mkdir(parents=True, exist_ok=True) self.experiment_dir.rename(new_experiment_dir) else: print( f'实验目录 {self.experiment_dir} 已存在,可使用 -f/--force 参数覆盖实验目录') exit(0) self.experiment_dir.mkdir(parents=True, exist_ok=False) def config(self) -> dict: assert self.config_file, "请指定 config file" snippet = parse_config(self.config_file, self.apply) config = from_snippet(snippet) return config def save_config(self, config: Optional[dict] = None, name: str = "config.json"): if not config: config = self.config() dump_to_json(config, self.experiment_dir / name) def save_command(self, name: str = 'run.sh'): with open(self.experiment_dir / name, 'w') as f: f.write(f"cd {quote(os.getcwd())}\n") envs = ['CUDA_VISIBLE_DEVICES'] for env in envs: value = os.environ.get(env, None) if value is not None: f.write(f'export {env}={quote(value)}\n') args_str = ' '.join(quote(arg)for arg in sys.argv) f.write(f'alias python={sys.executable}\n') f.write(f'python {args_str}\n') _logger.info('save command to %s', self.experiment_dir / name)
class Args3(TypedArgs): foo: bool = add_argument('--foo', action='store_true') bar: bool = add_argument('--bar', action='store_false') baz: bool = add_argument('--baz', action='store_false')
class Args(TypedArgs): config: Optional[str] = add_argument('-c', '--config')
class Args(TypedArgs): config: str = add_argument('-c', '--config', default='/path')
class Args(ta.TypedArgs): foo: str = ta.add_argument('-f', '--foo') bar: str = ta.add_argument()
class Args1(TypedArgs): foo: Optional[str] = add_argument('--foo')
class Args(TypedArgs): foo: List[str] = add_argument('--foo', nargs=2) bar: List[str] = add_argument(nargs=1)
class Args2(TypedArgs): foo: int = add_argument('--foo', action='store_const', const=42)
class Args(ta.TypedArgs): rank_start: int = ta.add_argument( '--rank-start', type=int, default=0, ) world_size: int = ta.add_argument( '--world-size', type=int, default=1, ) master_addr: str = ta.add_argument( '--master-addr', type=str, default='127.0.0.1' ) master_port: int = ta.add_argument( '--master-port', type=int, default=29500 ) redirect_stdout_and_stderr: bool = ta.add_argument( '--redirect-stdout-and-stderr', action='store_true', ) # 默认使用 cpu,后续可能加入 xla 支持 gpu: List[int] = ta.add_argument( "--gpu", type=parse_gpu_list, default=[], help="指定gpu,`1,2,5-7 -> [1,2,5,6,7]`" ) debug: bool = ta.add_argument( '-d', '--debug', action='store_true', ) no_python: bool = ta.add_argument( '--no-python', action='store_true' ) module: bool = ta.add_argument( '-m', '--module', action='store_true' ) training_script: str = ta.add_argument( type=str, ) training_script_args: List[str] = ta.add_argument( nargs=REMAINDER )
class Args4(TypedArgs): foo: List[str] = add_argument('--foo', action='append')
class Args1(Args): foo: str = add_argument('--foo')
class Args(TypedArgs): verbose: int = add_argument('--verbose', '-v', action='count', default=0)
class Args(ta.TypedArgs): foo: Optional[str] = ta.add_argument('--foo') bar: List[int] = ta.add_argument(nargs='*', default=[1, 2, 3])
class Args(TypedArgs): config: Optional[str] = add_argument('-c', '--config', help='path to config') ext_config: List[str] = add_argument( '-x', '--ext-config', nargs='*', default=[], help='Extra jsonnet config', ) debug: bool = add_argument('-d', '--debug', action='store_true', help='debug flag') experiment_dir: Optional[Path] = add_argument('-e', '--experiment-dir', const=Path('temp') / get_timestamp(), nargs=argparse.OPTIONAL, help='experiment dir') _run_dir: Optional[Path] = add_argument('--run-dir') def __repr__(self): d = self.__dict__.copy() d.pop('parser') return pformat(d) def save(self): with open(self.run_dir / 'run.sh', 'w') as f: f.write(f'cd {quote(os.getcwd())}\n') envs = ['CUDA_VISIBLE_DEVICES'] for env in envs: value = os.environ.get(env, None) if value is not None: f.write(f'export {env}={quote(value)}\n') f.write(sys.executable + ' ' + ' '.join(quote(arg) for arg in sys.argv) + '\n') RUN_DIR_NAME_REGEX = re.compile('^run_(\d+)_') @property def run_dir(self): if self.experiment_dir is not None and self._run_dir is None: run_id = -1 if self.experiment_dir.exists(): for previous_runs in self.experiment_dir.iterdir(): match = self.RUN_DIR_NAME_REGEX.match(previous_runs.name) if match is not None: run_id = max(int(match.group(1)), run_id) run_id += 1 self._run_dir = self.experiment_dir / f'run_{run_id}_{get_timestamp()}' return self._run_dir def make_run_dir(self): if self.experiment_dir is not None: self.experiment_dir.mkdir(parents=True, exist_ok=True) if not self.ask_for_replacing_older_dir(self.run_dir): raise EnvironmentError(f'Run dir "{self.run_dir}" exists') self.run_dir.mkdir(parents=True, exist_ok=False) def make_experiment_dir(self): if not self.ask_for_replacing_older_dir(self.experiment_dir): raise EnvironmentError( f'Experiment dir "{self.experiment_dir}" exists') self.run_dir.mkdir(parents=True, exist_ok=False) def ask_for_replacing_older_dir(self, dir_to_be_replaced: Path) -> bool: if not dir_to_be_replaced.exists(): return True print( f'File exists: {dir_to_be_replaced}\nDo you want to remove it and create a new one?' ) choice = input('Remove older directory? [y]es/[n]o: ') if choice in ['y', 'yes']: shutil.rmtree(dir_to_be_replaced) return True return False
class Args(ta.TypedArgs): no_python: bool = ta.add_argument('--no-python')