def score_genome(genome, episodes, pbar): # make gym env = gym.make("CartPole-v1") # Create an agent that solves the environment: config = AgentConfig(env=env, n_episodes=episodes, target_average=475, update_every=genome['updateEvery'], batch_size=genome['batchSize'], gamma=genome['gamma'], learning_rate=genome['learningRate'], fc1_units=genome['fc1_units'], fc2_units=genome['fc2_units'], tau=genome['tau'], memory_size=genome['memory_size'], deepq_double_learning=True, deepq_dueling_networks=True, per_active=True) agent = DQNAgent(config) # Create a runner that runs the agent in the environment runner = Runner(agent, verbose=1, pbar=pbar) # Run the agent score, checkpoint = runner.run_agent() # Close env.close() # Return return score.best_score, checkpoint
def score_genome(genome, episodes, pbar): # Make the gym env = gym.make("Acrobot-v1") # Create an agent that solves the environment: # "Acrobot-v1 is an unsolved environment, which means it does not have a specified reward threshold at which it's considered solved."" config = AgentConfig(env=env, device="cpu", n_episodes=episodes, target_average=-10, max_t=200, gamma=genome['gamma'], learning_rate=genome['learningRate'], fc1_units=genome['fc1_units']) agent = REINFORCEAgent(config) # Create a runner that runs the agent in the environment runner = Runner(agent, verbose=1, pbar=pbar) # Run the agent score, checkpoint = runner.run_single_probability_trajectory() # Return return score.best_score, checkpoint
def random_agent(self): env = gym.make(self.gym_name) # Make the gym agent = self.get_agent(AgentConfig(env=env)) # Create the agent runner = Runner( agent) # Create a runner that runs the agent in the environment runner.random() # Run the agent env.close()
def enjoy_agent(self, agent_type): env = gym.make(self.gym_name) # Make the gym agent = self.get_agent(AgentConfig(env=env)) # Create the agent runner = Runner( agent) # Create a runner that runs the agent in the environment if agent_type == "evolve": self.enjoy_checkpoint( runner, 'Checkpoints/{}_evo.ch'.format(self.gym_name.lower())) elif agent_type == "train": self.enjoy_checkpoint( runner, 'Checkpoints/{}_train.ch'.format(self.gym_name.lower())) env.close()
def score_genome(genome, episodes, pbar): # make gym env = gym.make("LunarLanderContinuous-v2") # Create an agent that solves the environment: config = AgentConfig(env=env, seed=42, target_average=200, n_episodes=episodes, batch_size=genome['batchSize'], gamma=genome['gamma'], actor_learning_rate=genome['actor_learningRate'], critic_learning_rate=genome['critic_learningRate'], weight_decay=genome['weight_decay'], update_every=genome['updateEvery'], ou_noise_sigma=0.01, fc1_units=genome['fc1_units'], fc2_units=genome['fc2_units'], memory_size=genome['memory_size'], per_active=False, batch_norm=False) agent = DDPGAgent(config) # Create a runner that runs the agent in the environment runner = Runner(agent, verbose=1, pbar=pbar) # Run the agent score, checkpoint = runner.run_agent() # Close env.close() # Return return score.best_score, checkpoint
import gym # Internals from Agents.PPO import PPOAgent from Agents.Common import AgentConfig from Agents.Common import Runner from Agents.Utils.parallelEnv import parallelEnv # Create an agent that solves the environment: envs = parallelEnv("PongDeterministic-v4", n=8, seed=1234) # Create the agent config = AgentConfig( parallelEnv=envs, n_episodes=1500, learning_rate=1e-4, fc1_units=256, ACTION_0=4, ACTION_1=5, max_t=500) agent = PPOAgent(config) # Create a runner that runs the agent in the environment runner = Runner(agent) # Run the agent score = runner.run_parallel_trajectories() # Visualize the score score.visualize()
import gym from Agents.Common import Runner from Agents.DDPG import DDPGAgent from Agents.Common import AgentConfig from Agents.Common import Runner # Make the gym env = gym.make("LunarLanderContinuous-v2") # Create the agent agent = DDPGAgent(AgentConfig(env=env)) # Create a runner that runs the agent in the environment runner = Runner(agent) # Run the agent runner.enjoy_checkpoint('Checkpoints/lunarlander_v2_ddpg_evo.ch') env.close()
import gym from Agents.Common import Runner from Agents.REINFORCE import REINFORCEAgent from Agents.Common import AgentConfig from Agents.Common import Runner # Make the gym env = gym.make("Acrobot-v1") # Create an agent that solves the environment: # "Acrobot-v1 is an unsolved environment, which means it does not have a specified reward threshold at which it's considered solved" config = AgentConfig(env=env, n_episodes=300, target_average=-100, max_t=200, gamma=0.92, fc1_units=512, learning_rate=0.002) agent = REINFORCEAgent(config) # Create a runner that runs the agent in the environment runner = Runner(agent, save_best_score='Checkpoints/acrobot_v1_reinforce_train.ch') # Run the agent score, checkpoint = runner.run_single_probability_trajectory() # Visualize the score score.visualize() env.close()
import gym from Agents.Common import Runner from Agents.REINFORCE import REINFORCEAgent from Agents.Common import AgentConfig from Agents.Common import Runner # Make the gym env = gym.make("Acrobot-v1") # Create the agent agent = REINFORCEAgent(AgentConfig(env=env)) # Create a runner that runs the agent in the environment runner = Runner(agent) # Run the agent runner.random() env.close()
import gym from Agents.Common import Runner from Agents.DQN import DQNAgent from Agents.Common import AgentConfig from Agents.Common import Runner # Make the gym env = gym.make("CartPole-v1") # Create the agent agent = DQNAgent(AgentConfig(env=env)) # Create a runner that runs the agent in the environment runner = Runner(agent) # Run the agent runner.enjoy_checkpoint('Checkpoints/cartpole_v1_dqn_evo.ch') env.close()
import gym from Agents.Common import Runner from Agents.DQN import DQNAgent from Agents.Common import AgentConfig from Agents.Common import Runner # Make the gym env = gym.make("CartPole-v1") # Create an agent that solves the environment: # - CartPole-v1 defines "solving" as getting average reward of 475.0 over 100 consecutive trials." config = AgentConfig(env=env, n_episodes=1000, target_average=475, update_every=1, batch_size=64, gamma=0.95, learning_rate=1e-4, deepq_double_learning=True, deepq_dueling_networks=True, per_active=True) agent = DQNAgent(config) # Create a runner that runs the agent in the environment runner = Runner(agent, save_best_score='Checkpoints/cartpole_v1_dqn_train.ch') # Run the agent score, checkpoint = runner.run_agent() # Visualize the score score.visualize()
from Agents.DDPG import DDPGAgent from Agents.Common import AgentConfig from Agents.Common import Runner # Create environment env = gym.make("LunarLanderContinuous-v2") # Create the agent config = AgentConfig(env=env, seed=42, target_average=200, n_episodes=500, batch_norm=False, batch_size=64, gamma=0.98, actor_learning_rate=1e-4, critic_learning_rate=1e-3, weight_decay=0, update_every=1, ou_noise_sigma=0.01, fc1_units=400, fc2_units=300, per_active=False) agent = DDPGAgent(config) # Create a runner that runs the agent in the environment runner = Runner(agent, save_best_score='Checkpoints/lunarlander_v2_ddpg_train.ch') # Run the agent