def main(args): """ Run an evaluation training. :param args: Dict[str, Any] :return: """ # construct logging objects args = DotDict(args) Init.print_ascii_logo() logger = Init.setup_logger(args.logdir, "eval") Init.log_args(logger, args) R.load_extern_classes(args.logdir) container = RenderContainer( args.actor, args.epoch, args.start, args.end, logger, args.logdir, args.gpu_id, args.seed, args.manager, ) try: container.run() finally: container.close()
def main(args): """ Run an evaluation. :param args: Dict[str, Any] :return: """ args = DotDict(args) Init.print_ascii_logo() logger = Init.setup_logger(args.logdir, 'eval') Init.log_args(logger, args) R.load_extern_classes(args.logdir) eval_container = EvalContainer( args.actor, args.epoch, logger, args.logdir, args.gpu_id, args.nb_episode, args.start, args.end, args.seed, args.manager ) try: eval_container.run() finally: eval_container.close()
def __init__( self, eval_actor, epoch_id, logger, log_id_dir, gpu_id, nb_episode, start, end, seed, manager ): self.log_dir_helper = log_dir_helper = LogDirHelper(log_id_dir) self.train_args = train_args = log_dir_helper.load_args() self.device = device = self._device_from_gpu_id(gpu_id) self.logger = logger if epoch_id: epoch_ids = [epoch_id] else: epoch_ids = self.log_dir_helper.epochs() epoch_ids = filter(lambda eid: eid >= start, epoch_ids) if end != -1.: epoch_ids = filter(lambda eid: eid <= end, epoch_ids) epoch_ids = list(epoch_ids) self.epoch_ids = epoch_ids engine = REGISTRY.lookup_engine(train_args.env) env_cls = REGISTRY.lookup_env(train_args.env) mgr_cls = REGISTRY.lookup_manager(manager) self.env_mgr = env_mgr = SubProcEnvManager.from_args( self.train_args, engine, env_cls, seed=seed, nb_env=nb_episode ) if train_args.agent: agent = train_args.agent else: agent = train_args.actor_host output_space = REGISTRY.lookup_output_space( agent, env_mgr.action_space ) actor_cls = REGISTRY.lookup_actor(eval_actor) self.actor = actor_cls.from_args( actor_cls.prompt(), env_mgr.action_space ) self.network = self._init_network( train_args, env_mgr.observation_space, env_mgr.gpu_preprocessor, output_space, REGISTRY ).to(device)
def from_defaults(args): if args.agent: agent_cls = R.lookup_agent(args.agent) agent_args = agent_cls.args else: h = R.lookup_actor(args.actor_host) w = R.lookup_actor(args.actor_worker) l = R.lookup_learner(args.learner) e = R.lookup_exp(args.exp) agent_args = {**h.args, **w.args, **l.args, **e.args} env_cls = R.lookup_env(args.env) rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm) env_args = env_cls.args rwdnorm_args = rwdnorm_cls.args if args.custom_network: net_args = R.lookup_network(args.custom_network).args else: net_args = R.lookup_modular_args(args) args = DotDict({ **args, **agent_args, **env_args, **rwdnorm_args, **net_args }) return args
def from_prompt(args): if args.agent: agent_cls = R.lookup_agent(args.agent) agent_args = agent_cls.prompt(provided=args) else: h = R.lookup_actor(args.actor_host) w = R.lookup_actor(args.actor_worker) l = R.lookup_learner(args.learner) e = R.lookup_exp(args.exp) agent_args = { **h.prompt(args), **w.prompt(args), **l.prompt(args), **e.prompt(args), } env_cls = R.lookup_env(args.env) rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm) env_args = env_cls.prompt(provided=args) rwdnorm_args = rwdnorm_cls.prompt(provided=args) if args.custom_network: net_args = R.lookup_network(args.custom_network).prompt() else: net_args = R.prompt_modular_args(args) args = DotDict({ **args, **agent_args, **env_args, **rwdnorm_args, **net_args }) return args
def main(local_args): """ Run distributed training. :param local_args: Dict[str, Any] :return: """ log_id_dir = local_args.log_id_dir initial_step_count = local_args.initial_step_count R.load_extern_classes(log_id_dir) logger = Init.setup_logger(log_id_dir, "train{}".format(GLOBAL_RANK)) helper = LogDirHelper(log_id_dir) with open(helper.args_file_path(), "r") as args_file: args = DotDict(json.load(args_file)) if local_args.resume: args = DotDict({**args, **vars(local_args)}) dist.init_process_group( backend="nccl", init_method=args.init_method, world_size=WORLD_SIZE, rank=LOCAL_RANK, ) logger.info("Rank {} initialized.".format(GLOBAL_RANK)) if LOCAL_RANK == 0: container = DistribHost( args, logger, log_id_dir, initial_step_count, LOCAL_RANK, GLOBAL_RANK, WORLD_SIZE, ) else: container = DistribWorker( args, logger, log_id_dir, initial_step_count, LOCAL_RANK, GLOBAL_RANK, WORLD_SIZE, ) try: container.run() finally: container.close()
def __init__(self, args, log_id_dir, initial_step_count, rank): seed = args.seed \ if rank == 0 \ else args.seed + args.nb_env * rank print('Worker {} using seed {}'.format(rank, seed)) # load saved registry classes REGISTRY.load_extern_classes(log_id_dir) # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) mgr_cls = REGISTRY.lookup_manager(args.manager) env_mgr = mgr_cls.from_args(args, engine, env_cls, seed=seed) # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu") output_space = REGISTRY.lookup_output_space(args.actor_worker, env_mgr.action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args(args, env_mgr.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY) actor_cls = REGISTRY.lookup_actor(args.actor_worker) actor = actor_cls.from_args(args, env_mgr.action_space) builder = actor_cls.exp_spec_builder(env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env) exp = REGISTRY.lookup_exp(args.exp).from_args(args, builder) self.actor = actor self.exp = exp.to(device) self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.device = device self.initial_step_count = initial_step_count # TODO: this should be set to eval after some number of training steps self.network.train() # SETUP state variables for run self.step_count = self.initial_step_count self.global_step_count = self.initial_step_count self.ep_rewards = torch.zeros(self.nb_env) self.rank = rank self.obs = dtensor_to_dev(self.env_mgr.reset(), self.device) self.internals = listd_to_dlist([ self.network.new_internals(self.device) for _ in range(self.nb_env) ]) self.start_time = time() self._weights_synced = False
def main(args): """ Run local training. :param args: Dict[str, Any] :return: """ args, log_id_dir, initial_step, logger = Init.main(MODE, args) R.save_extern_classes(log_id_dir) container = Local(args, logger, log_id_dir, initial_step) if args.profile: try: from pyinstrument import Profiler except: raise ImportError( 'You must install pyinstrument to use profiling.') container.nb_step = 10e3 profiler = Profiler() profiler.start() try: container.run() finally: if args.profile: profiler.stop() print(profiler.output_text(unicode=True, color=True)) container.close() if args.eval: from adept.scripts.evaluate import main eval_args = { 'log_id_dir': log_id_dir, 'gpu_id': 0, 'nb_episode': 30, } if args.custom_network: eval_args['custom_network'] = args.custom_network main(eval_args)
def register_learner(learner_cls): from adept.registry import REGISTRY REGISTRY.register_learner(learner_cls)
def __init__( self, args, log_id_dir, initial_step_count, rank=0, ): # ARGS TO STATE VARS self._args = args self.nb_learners = args.nb_learners self.nb_workers = args.nb_workers self.rank = rank self.nb_step = args.nb_step self.nb_env = args.nb_env self.initial_step_count = initial_step_count self.epoch_len = args.epoch_len self.summary_freq = args.summary_freq self.nb_learn_batch = args.nb_learn_batch self.rollout_queue_size = args.rollout_queue_size # can be none if rank != 0 self.log_id_dir = log_id_dir # load saved registry classes REGISTRY.load_extern_classes(log_id_dir) # ENV (temporary) env_cls = REGISTRY.lookup_env(args.env) env = env_cls.from_args(args, 0) env_action_space, env_observation_space, env_gpu_preprocessor = \ env.action_space, env.observation_space, env.gpu_preprocessor env.close() # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda") # ray handles gpus torch.backends.cudnn.benchmark = True output_space = REGISTRY.lookup_output_space( args.actor_worker, env_action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args( args, env_observation_space, output_space, env_gpu_preprocessor, REGISTRY ) self.network = net.to(device) # TODO: this is a hack, remove once queuer puts rollouts on the correct device self.network.device = device self.device = device self.network.train() # OPTIMIZER def optim_fn(x): return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99) if args.nb_learners > 1: self.optimizer = NCCLOptimizer(optim_fn, self.network, self.nb_learners) else: self.optimizer = optim_fn(self.network.parameters()) # LEARNER / EXP rwd_norm = REGISTRY.lookup_reward_normalizer( args.rwd_norm).from_args(args) actor_cls = REGISTRY.lookup_actor(args.actor_host) builder = actor_cls.exp_spec_builder( env.observation_space, env.action_space, net.internal_space(), args.nb_env * args.nb_learn_batch ) w_builder = REGISTRY.lookup_actor(args.actor_worker).exp_spec_builder( env.observation_space, env.action_space, net.internal_space(), args.nb_env ) actor = actor_cls.from_args(args, env.action_space) learner = REGISTRY.lookup_learner(args.learner).from_args(args, rwd_norm) exp_cls = REGISTRY.lookup_exp(args.exp).from_args(args, builder) self.actor = actor self.learner = learner self.exp = exp_cls.from_args(args, builder).to(device) # Rank 0 setup, load network/optimizer and create SummaryWriter/Saver if rank == 0: if args.load_network: self.network = self.load_network(self.network, args.load_network) print('Reloaded network from {}'.format(args.load_network)) if args.load_optim: self.optimizer = self.load_optim(self.optimizer, args.load_optim) print('Reloaded optimizer from {}'.format(args.load_optim)) print('Network parameters: ' + str(self.count_parameters(net))) self.summary_writer = SummaryWriter(log_id_dir) self.saver = SimpleModelSaver(log_id_dir)
def register_actor(actor_cls): from adept.registry import REGISTRY REGISTRY.register_actor(actor_cls)
def register_manager(manager_cls): from adept.registry import REGISTRY REGISTRY.register_manager(manager_cls)
def register_submodule(submod_cls): from adept.registry import REGISTRY REGISTRY.register_submodule(submod_cls)
def register_network(network_cls): from adept.registry import REGISTRY REGISTRY.register_network(network_cls)
def __init__(self, args, logger, log_id_dir, initial_step_count): # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) mgr_cls = REGISTRY.lookup_manager(args.manager) env_mgr = mgr_cls.from_args(args, engine, env_cls) # NETWORK torch.manual_seed(args.seed) if torch.cuda.is_available() and args.gpu_id >= 0: device = torch.device("cuda:{}".format(args.gpu_id)) torch.backends.cudnn.benchmark = True else: device = torch.device("cpu") output_space = REGISTRY.lookup_output_space(args.agent, env_mgr.action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args( args, env_mgr.gpu_preprocessor.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY, ) logger.info("Network parameters: " + str(self.count_parameters(net))) def optim_fn(x): if args.optim == "RMSprop": return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99) elif args.optim == "Adam": return torch.optim.Adam(x, lr=args.lr, eps=1e-5) def warmup_schedule(back_step): return back_step / args.warmup if back_step < args.warmup else 1.0 # AGENT rwd_norm = REGISTRY.lookup_reward_normalizer( args.rwd_norm).from_args(args) agent_cls = REGISTRY.lookup_agent(args.agent) builder = agent_cls.exp_spec_builder( env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env, ) agent = agent_cls.from_args(args, rwd_norm, env_mgr.action_space, builder) self.agent = agent.to(device) self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.optimizer = optim_fn(self.network.parameters()) self.scheduler = LambdaLR(self.optimizer, warmup_schedule) self.device = device self.initial_step_count = initial_step_count self.log_id_dir = log_id_dir self.epoch_len = args.epoch_len self.summary_freq = args.summary_freq self.logger = logger self.summary_writer = SummaryWriter(log_id_dir) self.saver = SimpleModelSaver(log_id_dir) self.updater = LocalUpdater(self.optimizer, self.network, args.grad_norm_clip) if args.load_network: self.network = self.load_network(self.network, args.load_network) logger.info("Reloaded network from {}".format(args.load_network)) if args.load_optim: self.optimizer = self.load_optim(self.optimizer, args.load_optim) logger.info("Reloaded optimizer from {}".format(args.load_optim)) self.network.train()
def __init__( self, args, logger, log_id_dir, initial_step_count, local_rank, global_rank, world_size, ): seed = ( args.seed if global_rank == 0 else args.seed + args.nb_env * global_rank ) logger.info("Using {} for rank {} seed.".format(seed, global_rank)) # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) mgr_cls = REGISTRY.lookup_manager(args.manager) env_mgr = mgr_cls.from_args(args, engine, env_cls, seed=seed) # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda:{}".format(local_rank)) output_space = REGISTRY.lookup_output_space( args.agent, env_mgr.action_space ) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args( args, env_mgr.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY, ) logger.info("Network parameters: " + str(self.count_parameters(net))) def optim_fn(x): return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99) # AGENT rwd_norm = REGISTRY.lookup_reward_normalizer(args.rwd_norm).from_args( args ) agent_cls = REGISTRY.lookup_agent(args.agent) builder = agent_cls.exp_spec_builder( env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env, ) agent = agent_cls.from_args( args, rwd_norm, env_mgr.action_space, builder ) self.agent = agent self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.optimizer = optim_fn(self.network.parameters()) self.device = device self.initial_step_count = initial_step_count self.log_id_dir = log_id_dir self.epoch_len = args.epoch_len self.summary_freq = args.summary_freq self.logger = logger self.summary_writer = SummaryWriter( os.path.join(log_id_dir, "rank{}".format(global_rank)) ) self.saver = SimpleModelSaver(log_id_dir) self.local_rank = local_rank self.global_rank = global_rank self.world_size = world_size self.updater = DistribUpdater( self.optimizer, self.network, args.grad_norm_clip, world_size, not args.no_divide, ) if args.load_network: self.network = self.load_network(self.network, args.load_network) logger.info("Reloaded network from {}".format(args.load_network)) if args.load_optim: self.optimizer = self.load_optim(self.optimizer, args.load_optim) logger.info("Reloaded optimizer from {}".format(args.load_optim)) self.network.train()
def register_exp(exp_cls): from adept.registry import REGISTRY REGISTRY.register_exp(exp_cls)
def register_agent(agent_cls): from adept.registry import REGISTRY REGISTRY.register_agent(agent_cls)
def __init__( self, args, logger, log_id_dir, initial_step_count, local_rank, global_rank, world_size ): seed = args.seed \ if global_rank == 0 \ else args.seed + args.nb_env * global_rank logger.info('Using {} for rank {} seed.'.format(seed, global_rank)) # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) env_mgr = SubProcEnvManager.from_args(args, engine, env_cls, seed=seed) # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda:{}".format(local_rank)) output_space = REGISTRY.lookup_output_space( args.agent, env_mgr.action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args( args, env_mgr.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY ) def optim_fn(x): return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99) # AGENT rwd_norm = REGISTRY.lookup_reward_normalizer( args.rwd_norm).from_args(args) agent_cls = REGISTRY.lookup_agent(args.agent) builder = agent_cls.exp_spec_builder( env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env ) agent = agent_cls.from_args( args, rwd_norm, env_mgr.action_space, builder ) self.agent = agent self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.optimizer = optim_fn(self.network.parameters()) self.device = device self.initial_step_count = initial_step_count self.log_id_dir = log_id_dir self.epoch_len = args.epoch_len self.summary_freq = args.summary_freq self.logger = logger self.local_rank = local_rank self.global_rank = global_rank self.world_size = world_size if args.load_network: self.network = self.load_network(self.network, args.load_network) logger.info('Reloaded network from {}'.format(args.load_network)) if args.load_optim: self.optimizer = self.load_optim(self.optimizer, args.load_optim) logger.info('Reloaded optimizer from {}'.format(args.load_optim)) self.network.train()
def main(args): """ Run actorlearner training. :param args: Dict[str, Any] :return: """ args, log_id_dir, initial_step, logger = Init.main(MODE, args) R.save_extern_classes(log_id_dir) # start ray if args.ray_addr is not None: ray.init(address=args.ray_addr) logger.info('Using Ray on a cluster. Head node address: {}'.format( args.ray_addr)) else: logger.info('Using Ray on a single machine.') ray.init() # create a main learner which logs summaries and saves weights main_learner_cls = ActorLearnerHost.as_remote( num_cpus=args.learner_cpu_alloc, num_gpus=args.learner_gpu_alloc) main_learner = main_learner_cls.remote(args, log_id_dir, initial_step, rank=0) # if multiple learners setup nccl if args.nb_learners > 1: # create N peer learners peer_learners = [] for p_ind in range(args.nb_learners - 1): remote_cls = ActorLearnerHost.as_remote( num_cpus=args.learner_cpu_alloc, num_gpus=args.learner_gpu_alloc) # init remote = remote_cls.remote(args, log_id_dir, initial_step, rank=p_ind + 1) peer_learners.append(remote) # figure out main learner node ip nccl_addr, nccl_ip, nccl_port = ray.get( main_learner._rank0_nccl_port_init.remote()) # setup all nccls nccl_inits = [ main_learner._nccl_init.remote(nccl_addr, nccl_ip, nccl_port) ] nccl_inits.extend([ p._nccl_init.remote(nccl_addr, nccl_ip, nccl_port) for p in peer_learners ]) # wait for all ray.get(nccl_inits) logger.info('NCCL initialized') # have all sync parameters [f._sync_peer_parameters.remote() for f in peer_learners] main_learner._sync_peer_parameters.remote() # else just 1 learner else: peer_learners = [] # create workers workers = [ ActorLearnerWorker.as_remote(num_cpus=args.worker_cpu_alloc, num_gpus=args.worker_gpu_alloc).remote( args, log_id_dir, initial_step, w_ind) for w_ind in range(args.nb_workers) ] # synchronize worker variables ray.get( main_learner.synchronize_worker_parameters.remote(workers, initial_step, blocking=True)) try: # startup the run method of all containers runs = [main_learner.run.remote(workers, args.profile)] runs.extend([f.run.remote(workers) for f in peer_learners]) done_training = ray.wait(runs) finally: closes = [main_learner.close.remote()] closes.extend([f.close.remote() for f in peer_learners]) done_closing = ray.wait(closes) if args.eval: from adept.scripts.evaluate import main eval_args = { 'log_id_dir': log_id_dir, 'gpu_id': 0, 'nb_episode': 30, } if args.custom_network: eval_args['custom_network'] = args.custom_network main(eval_args)
def register_env(env_cls): from adept.registry import REGISTRY REGISTRY.register_env(env_cls)
def main(args): """ Run distributed training. :param args: Dict[str, Any] :return: """ args = DotDict(args) dist_world_size = args.nb_proc * args.nb_node current_env = os.environ.copy() current_env["MASTER_ADDR"] = args.master_addr current_env["MASTER_PORT"] = str(args.master_port) current_env["WORLD_SIZE"] = str(dist_world_size) args, log_id_dir, initial_step, logger = Init.main(MODE, args) R.save_extern_classes(log_id_dir) processes = [] for local_rank in range(0, args.nb_proc): # each process's rank dist_rank = args.nb_proc * args.node_rank + local_rank current_env["RANK"] = str(dist_rank) current_env["LOCAL_RANK"] = str(local_rank) # spawn the processes if not args.resume: cmd = [ sys.executable, "-u", "-m", "adept.scripts._distrib", "--log-id-dir={}".format(log_id_dir), ] else: cmd = [ sys.executable, "-u", "-m", "adept.scripts._distrib", "--log-id-dir={}".format(log_id_dir), "--resume={}".format(True), "--load-network={}".format(args.load_network), "--load-optim={}".format(args.load_optim), "--initial-step-count={}".format(initial_step), "--init-method={}".format(args.init_method), ] if args.custom_network: cmd += ["--custom-network", args.custom_network] process = subprocess.Popen(cmd, env=current_env) processes.append(process) for process in processes: process.wait() if args.eval: from adept.scripts.evaluate import main eval_args = { "log_id_dir": log_id_dir, "gpu_id": 0, "nb_episode": 30, } if args.custom_network: eval_args["custom_network"] = args.custom_network main(eval_args)
def register_reward_norm(rwd_norm_cls): from adept.registry import REGISTRY REGISTRY.register_reward_normalizer(rwd_norm_cls)