def __init__(self, seed, cfg: Configuration, num_agents=1): super(DdpgAgent, self).__init__(cfg) """Initialize an Agent object. Params ====== seed (int): random seed cfg (Config): configration num_agents(int): number of agents """ self.num_agents = cfg.get_current_exp_cfg().environment_cfg.num_agents self.state_size = cfg.get_current_exp_cfg().agent_cfg.state_size self.action_size = cfg.get_current_exp_cfg().agent_cfg.action_size # Actor Network (w/ Target Network) # Critic Network (w/ Target Network) self.actor_current_model, self.actor_target_model, self.critic_current_model, self.critic_target_model = ModelFactory.create( seed, device, cfg) self.actor_optimizer = optim.Adam( self.actor_current_model.parameters(), lr=self.reinforcement_learning_cfg.ddpg_cfg.lr_actor) self.critic_optimizer = optim.Adam( self.critic_current_model.parameters(), lr=self.reinforcement_learning_cfg.ddpg_cfg.lr_critic, weight_decay=self.reinforcement_learning_cfg.ddpg_cfg.weight_decay) # Noise process self.noise = OUNoiseStandardNormal(self.action_size, seed) # Replay Memory if self.replay_memory_cfg.prioritized_replay: self.memory = PrioritizedReplayBuffer( int(self.replay_memory_cfg.buffer_size), alpha=self.replay_memory_cfg.prioritized_replay_alpha) if self.prioritized_replay_beta_iters is None: prioritized_replay_beta_iters = self.trainer_cfg.max_steps self.beta_schedule = LinearSchedule( prioritized_replay_beta_iters, initial_p=self.replay_memory_cfg.prioritized_replay_beta0, final_p=1.0) else: self.memory = ReplayBuffer(self.replay_memory_cfg.buffer_size) self.beta_schedule = None # Initialize time step counter (for prioritized memory replay) self.step_counter = 0 # Initialize time step (for updating every UPDATE_EVERY steps) self.step_update_counter = 0
def test_record_recordsParameters_multipleSavesOverwrites(self): config = Configuration(test_flag=True) session_id = Experiment(config).get_session_id() experiments_path = config.get_app_experiments_path(train_mode=False) model = 'model123' header = ['episode', 'step', 'action', 'reward'] recorder = Recorder(header=header, experiments_path=experiments_path, session_id=session_id, model=model, configuration=config.get_current_exp_cfg()) header_result = recorder.get_header() assert all([a == b for a, b in zip(header, header_result)]) parameters1 = [1, 1, 0, 0] parameters2 = [1, 2, 1, 0] parameters3 = [1, 3, 0, 10] # episode 1 recorder.record(parameters1) recorder.record(parameters2) recorder.record(parameters3) recorder.save() df = recorder.get_dataframe() (config, log) = recorder.load() assert df.shape[0] == log.shape[0] # episode 2 recorder.record(parameters1) recorder.record(parameters2) recorder.record(parameters3) recorder.save() df = recorder.get_dataframe() (config, log) = recorder.load() assert df.shape[0] == log.shape[0] assert config['session_id'] == session_id assert config['model'] == model
def test_listPlayExperiments_experimentsExist_returnsExperiments(self): config = Configuration(test_flag=True) explorer = Explorer(config=config) experiments = explorer.list_play_experiments() assert len(experiments) > 0
def __init__(self, cfg: Configuration): # training parameters self.trainer_cfg = cfg.get_current_exp_cfg().trainer_cfg # agent parameters self.agent_cfg = cfg.get_current_exp_cfg().agent_cfg # replay memory parameters self.replay_memory_cfg = cfg.get_current_exp_cfg().replay_memory_cfg self.prioritized_replay_beta_iters = None # reinforcement learning parameters self.reinforcement_learning_cfg = cfg.get_current_exp_cfg( ).reinforcement_learning_cfg self.frames_queue = deque(maxlen=self.agent_cfg.num_frames)
def test_listTrainExperiments_selectExperiment_printsConfig(self): config = Configuration(test_flag=True) explorer = Explorer(config=config) experiment = Experiment(config) analyzer = Analyzer(config=config, session_id=experiment.get_session_id()) experiments = explorer.list_train_experiments() analyzer.compare_train_config(experiments)
def test_listTrainExperiments_selectExperiment_compareEpochScore(self): config = Configuration(test_flag=True) explorer = Explorer(config=config) experiment = Experiment(config) analyzer = Analyzer(config=config, session_id=experiment.get_session_id()) experiments = explorer.list_train_experiments() for experiment in experiments: file = analyzer.compare_train_epoch_score(experiment) assert file is not None
def create(config: Configuration) -> IEnvironment: env_name = config.get_current_exp_cfg().gym_id env_type = config.get_current_exp_cfg().environment_cfg.env_type if env_type == 'gym': env = GymStandardEnv(name=env_name) elif env_type == 'atari_gym': env = GymAtariEnv(name=env_name) elif env_type == 'spaceinvaders_atari_gym': env = GymAtariSpaceInvadersEnv(name=env_name) elif env_type == 'unity': env = UnityEnv(name=env_name) elif env_type == 'unity-multiple': env = UnityMultipleEnv(name=env_name) else: raise Exception( "Environment '{}' type not supported".format(env_type)) return env
def __init__(self, cfg: Configuration, session_id, path_models='models'): super(MasterTrainer, self).__init__(cfg, session_id) reinforcement_learning_cfg = self.cfg.get_current_exp_cfg().reinforcement_learning_cfg if cfg.get_current_exp_cfg().reinforcement_learning_cfg.algorithm_type.startswith('ddpg'): self.eps = reinforcement_learning_cfg.ddpg_cfg.epsilon_start # initialize epsilon self.eps_end = reinforcement_learning_cfg.ddpg_cfg.epsilon_end self.eps_decay = reinforcement_learning_cfg.ddpg_cfg.epsilon_decay else: self.eps = reinforcement_learning_cfg.dqn_cfg.epsilon_start # initialize epsilon self.eps_end = reinforcement_learning_cfg.dqn_cfg.epsilon_end self.eps_decay = reinforcement_learning_cfg.dqn_cfg.epsilon_decay
def create(cfg: Configuration, session_id) -> ITrainer: algorithm_type = cfg.get_current_exp_cfg( ).reinforcement_learning_cfg.algorithm_type if algorithm_type.startswith('dqn'): trainer = MasterTrainer(cfg=cfg, session_id=session_id) elif algorithm_type.startswith('ddpg'): trainer = MasterTrainer(cfg=cfg, session_id=session_id) else: raise Exception( "Trainer for algorighm '{}' type not supported".format( algorithm_type)) return trainer
def create(config: Configuration, seed=0) -> IAgent: algorithm_type = config.get_current_exp_cfg( ).reinforcement_learning_cfg.algorithm_type if algorithm_type.startswith('dqn'): agent = DqnAgent(seed=seed, cfg=config) elif algorithm_type.startswith('ddpg'): agent = DdpgAgent(seed=seed, cfg=config) else: raise Exception( "Agent for algorighm '{}' type not supported".format( algorithm_type)) return agent
def get(): cfg = \ { "experiment_cfgs": [ { "id": "lunarlander", "gym_id": "LunarLander-v2", "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 8 }, "environment_cfg": { "env_type": "gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { "id": "breakout", "gym_id": "Breakout-ram-v4", "agent_cfg": { "action_size": 3, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 128 }, "environment_cfg": { "env_type": "spaceinvaders_atari_gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { "id": "breakout-rgb", "gym_id": "Breakout-v4", "agent_cfg": { "action_size": 3, "discrete": True, "num_frames": 1, "state_rgb": True, "state_size": [80, 80] }, "environment_cfg": { "env_type": "spaceinvaders_atari_gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { 'id': 'banana', 'gym_id': 'env/unity/mac/banana.app', "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 37 }, "environment_cfg": { "env_type": "unity", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn_double", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, ] } return Configuration(test_flag=True, exp_cfg=cfg)
def get(): cfg = \ { "experiment_cfgs": [ { "id": "lunarlander-dqn", "gym_id": "LunarLander-v2", "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 8 }, "environment_cfg": { "env_type": "gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { "id": "lunarlander-dqn-withframes", "gym_id": "LunarLander-v2", "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 10, "state_rgb": False, "state_size": 8 }, "environment_cfg": { "env_type": "gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { "id": "lunarlander-dqn-noprio", "gym_id": "LunarLander-v2", "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 8 }, "environment_cfg": { "env_type": "gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": False, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { "id": "lunarlander-dqn-dueling", "gym_id": "LunarLander-v2", "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 8 }, "environment_cfg": { "env_type": "gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn_dueling", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { "id": "lunarlander-dqn-double", "gym_id": "LunarLander-v2", "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 8 }, "environment_cfg": { "env_type": "gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "dqn_double", "dqn_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr": 0.0001, "model_cfg": { "hidden_layers": [ 64, 64 ] }, }, "ddpg_cfg": None }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, { "id": "walker-ddpg", "gym_id": "BipedalWalker-v3", "agent_cfg": { "action_size": 4, "discrete": True, "num_frames": 1, "state_rgb": False, "state_size": 24 }, "environment_cfg": { "env_type": "gym", "num_agents": 1 }, "reinforcement_learning_cfg": { "algorithm_type": "ddpg", "dqn_cfg": None, "ddpg_cfg": { "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.995, "lr_actor": 0.0001, "lr_critic": 0.0003, "weight_decay": 0.0001, "actor_model_cfg": { "hidden_layers": [ 64, 64 ] }, "critic_model_cfg": { "hidden_layers": [ 128, 128 ] }, } }, "replay_memory_cfg": { "buffer_size": 100000, "prioritized_replay": False, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta0": 0.4, "prioritized_replay_eps": 1e-06 }, "trainer_cfg": { "batch_size": 64, "eval_frequency": 16, "eval_steps": 4, "gamma": 0.99, "human_flag": False, "max_episode_steps": 2, "max_steps": 128, "tau": 0.001, "update_every": 4, "num_updates": 1 } }, ] } return Configuration(test_flag=True, exp_cfg=cfg)