def score_genome(genome, episodes, pbar):

    # make gym
    env = gym.make("CartPole-v1")

    # Create an agent that solves the environment:
    config = AgentConfig(env=env,
                         n_episodes=episodes,
                         target_average=475,
                         update_every=genome['updateEvery'],
                         batch_size=genome['batchSize'],
                         gamma=genome['gamma'],
                         learning_rate=genome['learningRate'],
                         fc1_units=genome['fc1_units'],
                         fc2_units=genome['fc2_units'],
                         tau=genome['tau'],
                         memory_size=genome['memory_size'],
                         deepq_double_learning=True,
                         deepq_dueling_networks=True,
                         per_active=True)
    agent = DQNAgent(config)

    # Create a runner that runs the agent in the environment
    runner = Runner(agent, verbose=1, pbar=pbar)

    # Run the agent
    score, checkpoint = runner.run_agent()

    # Close
    env.close()

    # Return
    return score.best_score, checkpoint
def score_genome(genome, episodes, pbar):

    # Make the gym
    env = gym.make("Acrobot-v1")

    # Create an agent that solves the environment:
    # "Acrobot-v1 is an unsolved environment, which means it does not have a specified reward threshold at which it's considered solved.""
    config = AgentConfig(env=env,
                         device="cpu",
                         n_episodes=episodes,
                         target_average=-10,
                         max_t=200,
                         gamma=genome['gamma'],
                         learning_rate=genome['learningRate'],
                         fc1_units=genome['fc1_units'])
    agent = REINFORCEAgent(config)

    # Create a runner that runs the agent in the environment
    runner = Runner(agent, verbose=1, pbar=pbar)

    # Run the agent
    score, checkpoint = runner.run_single_probability_trajectory()

    # Return
    return score.best_score, checkpoint
    def random_agent(self):

        env = gym.make(self.gym_name)  # Make the gym
        agent = self.get_agent(AgentConfig(env=env))  # Create the agent
        runner = Runner(
            agent)  # Create a runner that runs the agent in the environment
        runner.random()  # Run the agent

        env.close()
    def enjoy_agent(self, agent_type):

        env = gym.make(self.gym_name)  # Make the gym
        agent = self.get_agent(AgentConfig(env=env))  # Create the agent
        runner = Runner(
            agent)  # Create a runner that runs the agent in the environment

        if agent_type == "evolve":
            self.enjoy_checkpoint(
                runner, 'Checkpoints/{}_evo.ch'.format(self.gym_name.lower()))
        elif agent_type == "train":
            self.enjoy_checkpoint(
                runner,
                'Checkpoints/{}_train.ch'.format(self.gym_name.lower()))

        env.close()
Example #5
0
def score_genome(genome, episodes, pbar):

    # make gym
    env = gym.make("LunarLanderContinuous-v2")

    # Create an agent that solves the environment:
    config = AgentConfig(env=env,
                         seed=42,
                         target_average=200,
                         n_episodes=episodes,
                         batch_size=genome['batchSize'],
                         gamma=genome['gamma'],
                         actor_learning_rate=genome['actor_learningRate'],
                         critic_learning_rate=genome['critic_learningRate'],
                         weight_decay=genome['weight_decay'],
                         update_every=genome['updateEvery'],
                         ou_noise_sigma=0.01,
                         fc1_units=genome['fc1_units'],
                         fc2_units=genome['fc2_units'],
                         memory_size=genome['memory_size'],
                         per_active=False,
                         batch_norm=False)

    agent = DDPGAgent(config)

    # Create a runner that runs the agent in the environment
    runner = Runner(agent, verbose=1, pbar=pbar)

    # Run the agent
    score, checkpoint = runner.run_agent()

    # Close
    env.close()

    # Return
    return score.best_score, checkpoint
Example #6
0
import gym

# Internals
from Agents.PPO import PPOAgent
from Agents.Common import AgentConfig
from Agents.Common import Runner
from Agents.Utils.parallelEnv import parallelEnv

# Create an agent that solves the environment:
envs = parallelEnv("PongDeterministic-v4", n=8, seed=1234)

# Create the agent
config = AgentConfig(
    parallelEnv=envs, 
    n_episodes=1500, 
    learning_rate=1e-4,
    fc1_units=256,
    ACTION_0=4,
    ACTION_1=5,
    max_t=500)

agent = PPOAgent(config)

# Create a runner that runs the agent in the environment
runner = Runner(agent)

# Run the agent
score = runner.run_parallel_trajectories()

# Visualize the score
score.visualize()
Example #7
0
import gym
from Agents.Common import Runner
from Agents.DDPG import DDPGAgent
from Agents.Common import AgentConfig
from Agents.Common import Runner

# Make the gym
env = gym.make("LunarLanderContinuous-v2")

# Create the agent
agent = DDPGAgent(AgentConfig(env=env))

# Create a runner that runs the agent in the environment
runner = Runner(agent)

# Run the agent
runner.enjoy_checkpoint('Checkpoints/lunarlander_v2_ddpg_evo.ch')

env.close()
Example #8
0
import gym
from Agents.Common import Runner
from Agents.REINFORCE import REINFORCEAgent
from Agents.Common import AgentConfig
from Agents.Common import Runner

# Make the gym
env = gym.make("Acrobot-v1")

# Create an agent that solves the environment:
# "Acrobot-v1 is an unsolved environment, which means it does not have a specified reward threshold at which it's considered solved"
config = AgentConfig(env=env,
                     n_episodes=300,
                     target_average=-100,
                     max_t=200,
                     gamma=0.92,
                     fc1_units=512,
                     learning_rate=0.002)
agent = REINFORCEAgent(config)

# Create a runner that runs the agent in the environment
runner = Runner(agent,
                save_best_score='Checkpoints/acrobot_v1_reinforce_train.ch')

# Run the agent
score, checkpoint = runner.run_single_probability_trajectory()

# Visualize the score
score.visualize()

env.close()
import gym
from Agents.Common import Runner
from Agents.REINFORCE import REINFORCEAgent
from Agents.Common import AgentConfig
from Agents.Common import Runner

# Make the gym
env = gym.make("Acrobot-v1")

# Create the agent
agent = REINFORCEAgent(AgentConfig(env=env))

# Create a runner that runs the agent in the environment
runner = Runner(agent)

# Run the agent
runner.random()

env.close()
import gym
from Agents.Common import Runner
from Agents.DQN import DQNAgent
from Agents.Common import AgentConfig
from Agents.Common import Runner

# Make the gym
env = gym.make("CartPole-v1")

# Create the agent
agent = DQNAgent(AgentConfig(env=env))

# Create a runner that runs the agent in the environment
runner = Runner(agent)

# Run the agent
runner.enjoy_checkpoint('Checkpoints/cartpole_v1_dqn_evo.ch')

env.close()
Example #11
0
import gym
from Agents.Common import Runner
from Agents.DQN import DQNAgent
from Agents.Common import AgentConfig
from Agents.Common import Runner

# Make the gym
env = gym.make("CartPole-v1")

# Create an agent that solves the environment:
# - CartPole-v1 defines "solving" as getting average reward of 475.0 over 100 consecutive trials."
config = AgentConfig(env=env,
                     n_episodes=1000,
                     target_average=475,
                     update_every=1,
                     batch_size=64,
                     gamma=0.95,
                     learning_rate=1e-4,
                     deepq_double_learning=True,
                     deepq_dueling_networks=True,
                     per_active=True)
agent = DQNAgent(config)

# Create a runner that runs the agent in the environment
runner = Runner(agent, save_best_score='Checkpoints/cartpole_v1_dqn_train.ch')

# Run the agent
score, checkpoint = runner.run_agent()

# Visualize the score
score.visualize()
Example #12
0
from Agents.DDPG import DDPGAgent
from Agents.Common import AgentConfig
from Agents.Common import Runner

# Create environment
env = gym.make("LunarLanderContinuous-v2")

# Create the agent
config = AgentConfig(env=env,
                     seed=42,
                     target_average=200,
                     n_episodes=500,
                     batch_norm=False,
                     batch_size=64,
                     gamma=0.98,
                     actor_learning_rate=1e-4,
                     critic_learning_rate=1e-3,
                     weight_decay=0,
                     update_every=1,
                     ou_noise_sigma=0.01,
                     fc1_units=400,
                     fc2_units=300,
                     per_active=False)

agent = DDPGAgent(config)

# Create a runner that runs the agent in the environment
runner = Runner(agent,
                save_best_score='Checkpoints/lunarlander_v2_ddpg_train.ch')

# Run the agent