def __init__(self, model_aup, model_aux, env_type, z_dim, aup_train_steps, **kwargs): load_kwargs(self, kwargs) assert self.training_envs is not None self.aup_train_steps = aup_train_steps for m in model_aux.model_aux: m.to(self.compute_device) self.model_aux = model_aux.model_aux self.optimizer_aux = model_aux.optimizer_aux self.model_aup = model_aup.to(self.compute_device) self.optimizer_aup = optim.Adam(self.model_aup.parameters(), lr=self.learning_rate_aup) checkpointing.load_checkpoint(self.logdir, self, aup=True) self.exp = env_type """ AUP-specific parameters """ self.z_dim = z_dim self.use_scale = False if env_type == 'append-still': n_steps = 5e6 else: n_steps = 4e6 self.lamb_schedule = LinearSchedule(n_steps, initial_p=1e-3, final_p=1e-1)
def __init__(self, model, **kwargs): load_kwargs(self, kwargs) assert self.training_envs is not None self.model = model.to(self.compute_device) self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) self.load_checkpoint()
def __init__(self, *levels, logger, curriculum_params={}, **kwargs): super().__init__(*levels, repeat_levels=True, **kwargs) self.logger = logger self.curriculum_stage = 0 self.max_stage = len(levels) - 1 self.curr_currently_playing = 0 self.just_advanced = False self.perf_records = defaultdict( lambda: [0.0]) # map level to history of performance self.best = defaultdict(lambda: 0) load_kwargs(self, curriculum_params)
def __init__(self, training_model, target_model, **kwargs): load_kwargs(self, kwargs) assert self.training_envs is not None self.training_model = training_model.to(self.compute_device) self.target_model = target_model.to(self.compute_device) self.optimizer = optim.Adam(self.training_model.parameters(), lr=self.learning_rate) self.replay_buffer = ReplayBuffer(self.replay_size) checkpointing.load_checkpoint(self.logdir, self) print('loaded to {} steps'.format(self.num_steps))
def __init__(self, training_model, target_model, **kwargs): load_kwargs(self, kwargs) assert self.training_envs is not None self.training_model = training_model.to(self.compute_device) self.target_model = target_model.to(self.compute_device) self.optimizer = optim.Adam(self.training_model.parameters(), lr=self.learning_rate) self.replay_buffer = MultistepReplayBuffer(self.replay_size, len(self.training_envs), self.multi_step_learning, self.gamma) self.load_checkpoint() self.epsilon = self.epsilon_schedule(self.num_steps)
def __init__(self, training_model, target_model, **kwargs): load_kwargs(self, kwargs) assert self.training_envs is not None self.training_model = training_model.to(self.compute_device) self.target_model = target_model.to(self.compute_device) self.optimizer = optim.Adam(self.training_model.parameters(), lr=self.learning_rate) self.replay_buffer = ReplayBuffer(self.replay_size) self.agent_trajectories = defaultdict(lambda: np.empty( self.multi_step_learning, dtype=[('obs', object), ('action', int), ('reward', float)])) self.load_checkpoint() self.epsilon = self.epsilon_schedule(self.num_steps)
def __init__(self, model_aux, env_type, z_dim, n_rfn, buf_size, vae_epochs, random_projection, aux_train_steps, **kwargs): load_kwargs(self, kwargs) assert self.training_envs is not None self.model_aux = [ model_aux.to(self.compute_device) for _ in range(n_rfn) ] self.optimizer_aux = [ optim.Adam(m.parameters(), lr=self.learning_rate_aux) for m in self.model_aux ] self.aux_train_steps = aux_train_steps checkpointing.load_checkpoint(self.logdir, self, aux=True) print(self.model_aux) self.exp = env_type if self.num_steps >= (aux_train_steps - 1): skip_vae_training = True else: skip_vae_training = False print('loaded to {} steps'.format(self.num_steps)) print('Final train step is {} steps'.format(aux_train_steps)) self.z_dim = z_dim self.state_encoder = None self.n_random_reward_fns = n_rfn self.is_random_projection = random_projection self.random_buffer_size = buf_size self.train_encoder_epochs = vae_epochs if not self.is_random_projection and not skip_vae_training: self.state_encoder = [ self.train_state_encoder(envs=self.training_envs) for _ in range(n_rfn) ] if random_projection: self.state_encoder = [None for _ in range(n_rfn)] for model in self.model_aux: model.register_reward_function( dim=self.z_dim, projection=self.is_random_projection, device=self.compute_device)