def main(): args = Args.from_args() if args.debug: pass elif args.world_size < 2: warnings.warn('World size must be larger than 1') exit() if args.seed is not None: utils.reproduction.initialize_seed(args.seed) utils.environment.ulimit_n_max() # Run on main process to avoid conflict args.resolve_continue() args.make_run_dir() args.save() utils.pack_code(args.run_dir) free_port = utils.distributed.find_free_port() dist_url = f'tcp://127.0.0.1:{free_port}' print(f'world_size={args.world_size} Using dist_url={dist_url}') args.parser = None # Only single node distributed training is supported mp.spawn(main_worker, args=( args, dist_url, ), nprocs=args.world_size)
def main(): args = Args.from_args() if args.seed is not None: utils.reproduction.initialize_seed(args.seed) # run in main process for preventing concurrency conflict args.resolve_continue() args.make_run_dir() args.save() pack_code(args.run_dir) utils.environment.ulimit_n_max() free_port = utils.distributed.find_free_port() dist_url = f'tcp://127.0.0.1:{free_port}' print(f'world_size={args.world_size} Using dist_url={dist_url}') """ We only consider single node here. 'world_size' is the number of processes. """ args.parser = None mp.spawn(main_worker, args=( args, dist_url, ), nprocs=args.world_size)