def from_resume(mode, args): """ :param mode: Script name :param args: Dict[str, Any], static args :return: args, log_id, initial_step_count """ resume = args.resume log_dir_helper = LogDirHelper(args.resume) with open(log_dir_helper.args_file_path(), "r") as args_file: args = DotDict(json.load(args_file)) args.resume = resume args.load_network = log_dir_helper.latest_network_path() args.load_optim = log_dir_helper.latest_optim_path() initial_step_count = log_dir_helper.latest_epoch() if args.agent: name = args.agent else: name = args.actor_host log_id = Init.make_log_id( args.tag, mode, name, args.netbody, timestamp=log_dir_helper.timestamp(), ) log_id_path = Init.log_id_dir(args.logdir, args.env, log_id) return args, log_id_path, initial_step_count
def parse_args(): from docopt import docopt args = docopt(__doc__) args = {k.strip('--').replace('-', '_'): v for k, v in args.items()} del args['h'] del args['help'] args = DotDict(args) args.epoch = int(float(args.epoch)) args.gpu_id = int(args.gpu_id) args.seed = int(args.seed) return args
def parse_args(): from docopt import docopt args = docopt(__doc__) args = {k.strip("--").replace("-", "_"): v for k, v in args.items()} del args["h"] del args["help"] args = DotDict(args) args.epoch = int(float(args.epoch)) args.gpu_id = int(args.gpu_id) args.seed = int(args.seed) return args
def main(local_args): """ Run distributed training. :param local_args: Dict[str, Any] :return: """ log_id_dir = local_args.log_id_dir initial_step_count = local_args.initial_step_count R.load_extern_classes(log_id_dir) logger = Init.setup_logger(log_id_dir, "train{}".format(GLOBAL_RANK)) helper = LogDirHelper(log_id_dir) with open(helper.args_file_path(), "r") as args_file: args = DotDict(json.load(args_file)) if local_args.resume: args = DotDict({**args, **vars(local_args)}) dist.init_process_group( backend="nccl", init_method=args.init_method, world_size=WORLD_SIZE, rank=LOCAL_RANK, ) logger.info("Rank {} initialized.".format(GLOBAL_RANK)) if LOCAL_RANK == 0: container = DistribHost( args, logger, log_id_dir, initial_step_count, LOCAL_RANK, GLOBAL_RANK, WORLD_SIZE, ) else: container = DistribWorker( args, logger, log_id_dir, initial_step_count, LOCAL_RANK, GLOBAL_RANK, WORLD_SIZE, ) try: container.run() finally: container.close()
def main(args): """ Run an evaluation training. :param args: Dict[str, Any] :return: """ # construct logging objects args = DotDict(args) Init.print_ascii_logo() logger = Init.setup_logger(args.logdir, "eval") Init.log_args(logger, args) R.load_extern_classes(args.logdir) container = RenderContainer( args.actor, args.epoch, args.start, args.end, logger, args.logdir, args.gpu_id, args.seed, args.manager, ) try: container.run() finally: container.close()
def from_prompt(args): if args.agent: agent_cls = R.lookup_agent(args.agent) agent_args = agent_cls.prompt(provided=args) else: h = R.lookup_actor(args.actor_host) w = R.lookup_actor(args.actor_worker) l = R.lookup_learner(args.learner) e = R.lookup_exp(args.exp) agent_args = { **h.prompt(args), **w.prompt(args), **l.prompt(args), **e.prompt(args), } env_cls = R.lookup_env(args.env) rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm) env_args = env_cls.prompt(provided=args) rwdnorm_args = rwdnorm_cls.prompt(provided=args) if args.custom_network: net_args = R.lookup_network(args.custom_network).prompt() else: net_args = R.prompt_modular_args(args) args = DotDict({ **args, **agent_args, **env_args, **rwdnorm_args, **net_args }) return args
def main(mode, args): args = DotDict(args) if not args.prompt: args = Init.from_defaults(args) if args.config: args = Init.from_config(args) if args.prompt: args = Init.from_prompt(args) if args.agent: name = args.agent else: name = args.actor_host log_id = Init.make_log_id(args.tag, mode, name, args.netbody) log_id_dir = Init.log_id_dir(args.logdir, args.env, log_id) initial_step = 0 if args.resume: args, log_id_dir, initial_step = Init.from_resume(mode, args) Init.print_ascii_logo() Init.make_log_dirs(log_id_dir) Init.write_args_file(log_id_dir, args) logger = Init.setup_logger(log_id_dir) Init.log_args(logger, args) return args, log_id_dir, initial_step, logger
def main(args): """ Run an evaluation. :param args: Dict[str, Any] :return: """ args = DotDict(args) Init.print_ascii_logo() logger = Init.setup_logger(args.logdir, 'eval') Init.log_args(logger, args) R.load_extern_classes(args.logdir) eval_container = EvalContainer( args.actor, args.epoch, logger, args.logdir, args.gpu_id, args.nb_episode, args.start, args.end, args.seed, args.manager ) try: eval_container.run() finally: eval_container.close()
def from_defaults(args): if args.agent: agent_cls = R.lookup_agent(args.agent) agent_args = agent_cls.args else: h = R.lookup_actor(args.actor_host) w = R.lookup_actor(args.actor_worker) l = R.lookup_learner(args.learner) e = R.lookup_exp(args.exp) agent_args = {**h.args, **w.args, **l.args, **e.args} env_cls = R.lookup_env(args.env) rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm) env_args = env_cls.args rwdnorm_args = rwdnorm_cls.args if args.custom_network: net_args = R.lookup_network(args.custom_network).args else: net_args = R.lookup_modular_args(args) args = DotDict({ **args, **agent_args, **env_args, **rwdnorm_args, **net_args }) return args
def main(args): """ Generate SC2 replays. :param args: Dict[str, Any] :return: """ print_ascii_logo() print('Saving replays... Press Ctrl+C to stop.') log_dir_helper = LogDirHelper(args.log_id_dir) with open(log_dir_helper.args_file_path(), 'r') as args_file: train_args = DotDict(json.load(args_file)) engine = env_registry.lookup_engine(train_args.env) assert engine == 'AdeptSC2Env', "replay_gen_sc2.py is only for SC2." # construct env env = SubProcEnvManager.from_args( train_args, seed=args.seed, nb_env=1, registry=env_registry, sc2_replay_dir=log_dir_helper.epoch_path_at_epoch(args.epoch), sc2_render=args.render) output_space = agent_registry.lookup_output_space(train_args.agent, env.action_space) if args.custom_network: network = net_registry.lookup_custom_net( train_args.custom_network).from_args(train_args, env.observation_space, output_space, net_registry) else: network = ModularNetwork.from_args(train_args, env.observation_space, output_space, net_registry) # create an agent (add act_eval method) device = torch.device("cuda:{}".format(args.gpu_id) if ( torch.cuda.is_available() and args.gpu_id >= 0) else "cpu") torch.backends.cudnn.benchmark = True agent = agent_registry.lookup_agent(train_args.agent).from_args( train_args, network, device, env_registry.lookup_reward_normalizer(train_args.env), env.gpu_preprocessor, env_registry.lookup_policy(env.engine)(env.action_space), nb_env=1) # create a rendering container # TODO: could terminate after a configurable number of replays instead of running indefinitely renderer = ReplayGenerator(agent, device, env) try: renderer.run() finally: env.close()
def __init__( self, actor, epoch_id, start, end, logger, log_id_dir, gpu_id, seed, manager, extra_args={}, ): self.log_dir_helper = log_dir_helper = LogDirHelper(log_id_dir) self.train_args = train_args = log_dir_helper.load_args() self.train_args = DotDict({**self.train_args, **extra_args}) self.device = device = self._device_from_gpu_id(gpu_id) self.logger = logger if epoch_id: epoch_ids = [epoch_id] else: epoch_ids = self.log_dir_helper.epochs() epoch_ids = filter(lambda eid: eid >= start, epoch_ids) if end != -1.0: epoch_ids = filter(lambda eid: eid <= end, epoch_ids) epoch_ids = list(epoch_ids) self.epoch_ids = epoch_ids engine = REGISTRY.lookup_engine(train_args.env) env_cls = REGISTRY.lookup_env(train_args.env) manager_cls = REGISTRY.lookup_manager(manager) self.env_mgr = manager_cls.from_args(self.train_args, engine, env_cls, seed=seed, nb_env=1) if train_args.agent: agent = train_args.agent else: agent = train_args.actor_host output_space = REGISTRY.lookup_output_space(agent, self.env_mgr.action_space) actor_cls = REGISTRY.lookup_actor(actor) self.actor = actor_cls.from_args(actor_cls.prompt(), self.env_mgr.action_space) self.network = self._init_network( train_args, self.env_mgr.observation_space, self.env_mgr.gpu_preprocessor, output_space, REGISTRY, ).to(device)
def parse_args(): from docopt import docopt args = docopt(__doc__) args = {k.strip('--').replace('-', '_'): v for k, v in args.items()} del args['h'] del args['help'] args = DotDict(args) args.logdir = parse_path(args.logdir) # TODO implement Option utility epoch_option = parse_none(args.epoch) if epoch_option: args.epoch = int(float(epoch_option)) else: args.epoch = epoch_option args.gpu_id = int(args.gpu_id) args.nb_episode = int(args.nb_episode) args.start = float(args.start) args.end = float(args.end) args.seed = int(args.seed) return args
def parse_args(): from docopt import docopt args = docopt(__doc__) args = {k.strip("--").replace("-", "_"): v for k, v in args.items()} del args["h"] del args["help"] args = DotDict(args) args.logdir = parse_path(args.logdir) # TODO implement Option utility epoch_option = parse_none(args.epoch) if epoch_option: args.epoch = int(float(epoch_option)) else: args.epoch = epoch_option args.start = int(float(args.start)) args.end = int(float(args.end)) args.gpu_id = int(args.gpu_id) args.seed = int(args.seed) return args
def main(args): """ Run distributed training. :param args: Dict[str, Any] :return: """ args = DotDict(args) dist_world_size = args.nb_proc * args.nb_node current_env = os.environ.copy() current_env["MASTER_ADDR"] = args.master_addr current_env["MASTER_PORT"] = str(args.master_port) current_env["WORLD_SIZE"] = str(dist_world_size) args, log_id_dir, initial_step, logger = Init.main(MODE, args) R.save_extern_classes(log_id_dir) processes = [] for local_rank in range(0, args.nb_proc): # each process's rank dist_rank = args.nb_proc * args.node_rank + local_rank current_env["RANK"] = str(dist_rank) current_env["LOCAL_RANK"] = str(local_rank) # spawn the processes if not args.resume: cmd = [ sys.executable, "-u", "-m", "adept.scripts._distrib", "--log-id-dir={}".format(log_id_dir), ] else: cmd = [ sys.executable, "-u", "-m", "adept.scripts._distrib", "--log-id-dir={}".format(log_id_dir), "--resume={}".format(True), "--load-network={}".format(args.load_network), "--load-optim={}".format(args.load_optim), "--initial-step-count={}".format(initial_step), "--init-method={}".format(args.init_method), ] if args.custom_network: cmd += ["--custom-network", args.custom_network] process = subprocess.Popen(cmd, env=current_env) processes.append(process) for process in processes: process.wait() if args.eval: from adept.scripts.evaluate import main eval_args = { "log_id_dir": log_id_dir, "gpu_id": 0, "nb_episode": 30, } if args.custom_network: eval_args["custom_network"] = args.custom_network main(eval_args)
def parse_args(): from docopt import docopt args = docopt(__doc__) args = {k.strip("--").replace("-", "_"): v for k, v in args.items()} del args["h"] del args["help"] args = DotDict(args) args.nb_node = int(args.nb_node) args.node_rank = int(args.node_rank) args.nb_proc = int(args.nb_proc) args.master_port = int(args.master_port) if args.resume: args.resume = parse_path(args.resume) return args if args.config: args.config = parse_path(args.config) args.logdir = parse_path(args.logdir) args.nb_env = int(args.nb_env) args.seed = int(args.seed) args.nb_step = int(float(args.nb_step)) args.tag = parse_none(args.tag) args.nb_eval_env = int(args.nb_eval_env) args.summary_freq = int(args.summary_freq) args.lr = float(args.lr) args.epoch_len = int(float(args.epoch_len)) args.profile = bool(args.profile) return args
def parse_args(): from docopt import docopt args = docopt(__doc__) args = {k.strip('--').replace('-', '_'): v for k, v in args.items()} del args['h'] del args['help'] args = DotDict(args) # Ignore other args if resuming if args.resume: args.resume = parse_path(args.resume) return args if args.config: args.config = parse_path(args.config) args.logdir = parse_path(args.logdir) args.nb_env = int(args.nb_env) args.seed = int(args.seed) args.nb_step = int(float(args.nb_step)) args.tag = parse_none(args.tag) args.summary_freq = int(args.summary_freq) args.lr = float(args.lr) args.epoch_len = int(float(args.epoch_len)) args.profile = bool(args.profile) args.ray_addr = parse_none(args.ray_addr) args.nb_learners = int(args.nb_learners) args.nb_workers = int(args.nb_workers) args.learner_cpu_alloc = int(args.learner_cpu_alloc) args.learner_gpu_alloc = float(args.learner_gpu_alloc) args.worker_cpu_alloc = int(args.worker_cpu_alloc) args.worker_gpu_alloc = float(args.worker_gpu_alloc) args.nb_learn_batch = int(args.nb_learn_batch) args.rollout_queue_size = int(args.rollout_queue_size) # arg checking assert args.nb_learn_batch <= args.nb_workers, 'WARNING: nb_learn_batch must be <= nb_workers. Got {} <= {}' \ .format(args.nb_learn_batch, args.nb_workers) return args
def parse_args(): from docopt import docopt args = docopt(__doc__) args = {k.strip('--').replace('-', '_'): v for k, v in args.items()} del args['h'] del args['help'] args = DotDict(args) # Ignore other args if resuming if args.resume: args.resume = parse_path(args.resume) return args if args.config: args.config = parse_path(args.config) args.logdir = parse_path(args.logdir) args.gpu_id = int(args.gpu_id) args.nb_env = int(args.nb_env) args.seed = int(args.seed) args.nb_step = int(float(args.nb_step)) args.tag = parse_none(args.tag) args.nb_eval_env = int(args.nb_eval_env) args.summary_freq = int(args.summary_freq) args.lr = float(args.lr) args.warmup = int(float(args.warmup)) args.epoch_len = int(float(args.epoch_len)) args.profile = bool(args.profile) return args
def from_config(args): with open(args.config, "r") as args_file: config_args = json.load(args_file) return DotDict({**args, **config_args})
def load_args(self): with open(self.args_file_path()) as args_file: return DotDict(json.load(args_file))