def __init__(self, args, log_id_dir, initial_step_count, rank): seed = args.seed \ if rank == 0 \ else args.seed + args.nb_env * rank print('Worker {} using seed {}'.format(rank, seed)) # load saved registry classes REGISTRY.load_extern_classes(log_id_dir) # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) mgr_cls = REGISTRY.lookup_manager(args.manager) env_mgr = mgr_cls.from_args(args, engine, env_cls, seed=seed) # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu") output_space = REGISTRY.lookup_output_space(args.actor_worker, env_mgr.action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args(args, env_mgr.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY) actor_cls = REGISTRY.lookup_actor(args.actor_worker) actor = actor_cls.from_args(args, env_mgr.action_space) builder = actor_cls.exp_spec_builder(env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env) exp = REGISTRY.lookup_exp(args.exp).from_args(args, builder) self.actor = actor self.exp = exp.to(device) self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.device = device self.initial_step_count = initial_step_count # TODO: this should be set to eval after some number of training steps self.network.train() # SETUP state variables for run self.step_count = self.initial_step_count self.global_step_count = self.initial_step_count self.ep_rewards = torch.zeros(self.nb_env) self.rank = rank self.obs = dtensor_to_dev(self.env_mgr.reset(), self.device) self.internals = listd_to_dlist([ self.network.new_internals(self.device) for _ in range(self.nb_env) ]) self.start_time = time() self._weights_synced = False
def __init__( self, eval_actor, epoch_id, logger, log_id_dir, gpu_id, nb_episode, start, end, seed, manager ): self.log_dir_helper = log_dir_helper = LogDirHelper(log_id_dir) self.train_args = train_args = log_dir_helper.load_args() self.device = device = self._device_from_gpu_id(gpu_id) self.logger = logger if epoch_id: epoch_ids = [epoch_id] else: epoch_ids = self.log_dir_helper.epochs() epoch_ids = filter(lambda eid: eid >= start, epoch_ids) if end != -1.: epoch_ids = filter(lambda eid: eid <= end, epoch_ids) epoch_ids = list(epoch_ids) self.epoch_ids = epoch_ids engine = REGISTRY.lookup_engine(train_args.env) env_cls = REGISTRY.lookup_env(train_args.env) mgr_cls = REGISTRY.lookup_manager(manager) self.env_mgr = env_mgr = SubProcEnvManager.from_args( self.train_args, engine, env_cls, seed=seed, nb_env=nb_episode ) if train_args.agent: agent = train_args.agent else: agent = train_args.actor_host output_space = REGISTRY.lookup_output_space( agent, env_mgr.action_space ) actor_cls = REGISTRY.lookup_actor(eval_actor) self.actor = actor_cls.from_args( actor_cls.prompt(), env_mgr.action_space ) self.network = self._init_network( train_args, env_mgr.observation_space, env_mgr.gpu_preprocessor, output_space, REGISTRY ).to(device)
def __init__( self, args, logger, log_id_dir, initial_step_count, local_rank, global_rank, world_size, ): seed = ( args.seed if global_rank == 0 else args.seed + args.nb_env * global_rank ) logger.info("Using {} for rank {} seed.".format(seed, global_rank)) # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) mgr_cls = REGISTRY.lookup_manager(args.manager) env_mgr = mgr_cls.from_args(args, engine, env_cls, seed=seed) # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda:{}".format(local_rank)) output_space = REGISTRY.lookup_output_space( args.agent, env_mgr.action_space ) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args( args, env_mgr.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY, ) logger.info("Network parameters: " + str(self.count_parameters(net))) def optim_fn(x): return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99) # AGENT rwd_norm = REGISTRY.lookup_reward_normalizer(args.rwd_norm).from_args( args ) agent_cls = REGISTRY.lookup_agent(args.agent) builder = agent_cls.exp_spec_builder( env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env, ) agent = agent_cls.from_args( args, rwd_norm, env_mgr.action_space, builder ) self.agent = agent self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.optimizer = optim_fn(self.network.parameters()) self.device = device self.initial_step_count = initial_step_count self.log_id_dir = log_id_dir self.epoch_len = args.epoch_len self.summary_freq = args.summary_freq self.logger = logger self.summary_writer = SummaryWriter( os.path.join(log_id_dir, "rank{}".format(global_rank)) ) self.saver = SimpleModelSaver(log_id_dir) self.local_rank = local_rank self.global_rank = global_rank self.world_size = world_size self.updater = DistribUpdater( self.optimizer, self.network, args.grad_norm_clip, world_size, not args.no_divide, ) if args.load_network: self.network = self.load_network(self.network, args.load_network) logger.info("Reloaded network from {}".format(args.load_network)) if args.load_optim: self.optimizer = self.load_optim(self.optimizer, args.load_optim) logger.info("Reloaded optimizer from {}".format(args.load_optim)) self.network.train()
def __init__(self, args, logger, log_id_dir, initial_step_count): # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) mgr_cls = REGISTRY.lookup_manager(args.manager) env_mgr = mgr_cls.from_args(args, engine, env_cls) # NETWORK torch.manual_seed(args.seed) if torch.cuda.is_available() and args.gpu_id >= 0: device = torch.device("cuda:{}".format(args.gpu_id)) torch.backends.cudnn.benchmark = True else: device = torch.device("cpu") output_space = REGISTRY.lookup_output_space(args.agent, env_mgr.action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args( args, env_mgr.gpu_preprocessor.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY, ) logger.info("Network parameters: " + str(self.count_parameters(net))) def optim_fn(x): if args.optim == "RMSprop": return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99) elif args.optim == "Adam": return torch.optim.Adam(x, lr=args.lr, eps=1e-5) def warmup_schedule(back_step): return back_step / args.warmup if back_step < args.warmup else 1.0 # AGENT rwd_norm = REGISTRY.lookup_reward_normalizer( args.rwd_norm).from_args(args) agent_cls = REGISTRY.lookup_agent(args.agent) builder = agent_cls.exp_spec_builder( env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env, ) agent = agent_cls.from_args(args, rwd_norm, env_mgr.action_space, builder) self.agent = agent.to(device) self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.optimizer = optim_fn(self.network.parameters()) self.scheduler = LambdaLR(self.optimizer, warmup_schedule) self.device = device self.initial_step_count = initial_step_count self.log_id_dir = log_id_dir self.epoch_len = args.epoch_len self.summary_freq = args.summary_freq self.logger = logger self.summary_writer = SummaryWriter(log_id_dir) self.saver = SimpleModelSaver(log_id_dir) self.updater = LocalUpdater(self.optimizer, self.network, args.grad_norm_clip) if args.load_network: self.network = self.load_network(self.network, args.load_network) logger.info("Reloaded network from {}".format(args.load_network)) if args.load_optim: self.optimizer = self.load_optim(self.optimizer, args.load_optim) logger.info("Reloaded optimizer from {}".format(args.load_optim)) self.network.train()