def __init__(self, config=default()): super().__init__(config) self.experience = namedtuple( "Experience", field_names=["state", "action", "reward", "next_state", "done"]) logger.info(describe(self)) self.clear()
def __init__(self, config, input_state_size, output_action_size, fc1_units=400, fc2_units=300): super(DDPG_Networks, self).__init__(config, input_state_size, output_action_size) self.actor_local = LowDimActor(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) self.actor_target = LowDimActor(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) self.critic_local = LowDimCritic(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) self.critic_target = LowDimCritic(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) logger.warn('DDPG_Networks is initialized!') logger.info(utils.describe(self))
def _test(self, interim_test: bool = False): """Common test routine.""" if interim_test: test_num = self._configs.glob.interim_test_nums else: test_num = self._configs.glob.num_episodes for i_episode in range(test_num): state = self.env.reset() done = False score = 0 step = 0 while not done: self.env.render() action = self.select_action(state) next_state, reward, done = self.step(action) state = next_state score += reward step += 1 logger.info( "test %d\tstep: %d\ttotal score: %d" % (i_episode, step, score) ) logger.log_scalar("Test score", score)
def __init__(self, config): super(GymEnv, self).__init__(config) self._env = gym.make(self.name) self._set_attr_from_u_env(self._env) self._env.seed(self.seed) logger.info(utils.describe(self))
def save_params(self, params: dict, n_episode: int): if not os.path.exists("./saved"): os.mkdir("./saved") save_name = self.env_name + "_" + self.hparams.name path = os.path.join("./saved/" + save_name + "_ep_" + str(n_episode) + ".pt") torch.save(params, path) logger.info("Saved the model and optimizer to", path)
def load_params(self, path: str): """Load model and optimizer parameters.""" if not os.path.exists(path): logger.fatal("the input path does not exist. ->", path) return params = torch.load(path) self.actor.load_state_dict(params["actor_state_dict"]) self.actor_target.load_state_dict(params["actor_target_state_dict"]) self.critic.load_state_dict(params["critic_state_dict"]) self.critic_target.load_state_dict(params["critic_target_state_dict"]) self.actor_optimizer.load_state_dict(params["actor_optim_state_dict"]) self.critic_optimizer.load_state_dict(params["critic_optim_state_dict"]) logger.info("loaded the model and optimizer from", path)
def __init__(self, env: BaseEnv, models: tuple, optims: tuple, noise: OUNoise, configs: Configs = default()): super(DDPGAgent, self).__init__(env, configs) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims self.curr_state = np.zeros((1,)) self.noise = noise self.total_step = 0 self.episode_step = 0 self.i_episode = 0 if configs.glob.load_from is not None and os.path.exists(configs.glob.load_from): self.load_params(configs.glob.load_from) self._initialize() logger.info(describe(self))
def write_log(self, i: int, loss: np.ndarray, score: int, avg_score): """Write log about loss and score""" total_loss = loss.sum() logger.info( "episode %d:\t episode step: %d | total step: %d | total score: %d |\t" "total loss: %f | actor_loss: %.3f | critic_loss: %.3f\n" % ( i, self.episode_step, self.total_step, score, total_loss, loss[0], loss[1], ) # actor loss # critic loss ) if self._configs.glob.log: logger.log_scalar("scores/score", score, i) logger.log_scalar("scores/avg_score", avg_score, i) logger.log_scalar("losses/total_loss", total_loss, i) logger.log_scalar("losses/actor_loss", loss[0], i) logger.log_scalar("losses/critic_loss", loss[1], i)
from kayddrl.configs.default import default from kayddrl.demo.ddpg_test import run from kayddrl.envs import make_env from kayddrl.utils.logging import logger from kayddrl.utils.utils import set_global_seeds if __name__ == '__main__': cfg = default() set_global_seeds(cfg.glob.seed) env = make_env(cfg.env) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] logger.info(env.action_space.low, env.action_space.high, env.observation_space.shape[0]) run(env, cfg, state_dim, action_dim)