Ejemplo n.º 1
0
from trickster.agent import PPO
from trickster.rollout import MultiRolling, Trajectory
from trickster.utility import gymic
from trickster.model import mlp

envs = [gymic.rwd_scaled_env("LunarLander-v2") for _ in range(32)]
test_env = gymic.rwd_scaled_env("LunarLander-v2")

input_shape = envs[0].observation_space.shape
num_actions = envs[0].action_space.n

actor = mlp.wide_mlp_actor_categorical(input_shape, num_actions, adam_lr=1e-4)
critic = mlp.wide_mlp_critic_network(input_shape, output_dim=1, adam_lr=1e-4)
agent = PPO(actor,
            critic,
            action_space=num_actions,
            discount_factor_gamma=0.99,
            entropy_penalty_coef=0.05)

rollout = MultiRolling(agent, envs)
test_rollout = Trajectory(agent, test_env)

rollout.fit(episodes=1000,
            updates_per_episode=1,
            steps_per_update=32,
            update_batch_size=32,
            testing_rollout=test_rollout,
            plot_curves=True)
test_rollout.render(repeats=10)
Ejemplo n.º 2
0
from trickster.agent import TD3
from trickster.rollout import Trajectory, Rolling, RolloutConfig
from trickster.utility import spaces, gymic
from trickster.experience import Experience
from trickster.model import mlp

env = gymic.rwd_scaled_env("LunarLanderContinuous-v2", reward_scale=0.01)

input_shape = env.observation_space.shape
num_actions = env.action_space.shape[0]

actor, critics = mlp.wide_ddpg_actor_critic(input_shape,
                                            output_dim=num_actions,
                                            action_range=2,
                                            num_critics=2,
                                            actor_lr=5e-4,
                                            critic_lr=5e-4)

agent = TD3(actor,
            critics,
            action_space=spaces.CONTINUOUS,
            memory=Experience(max_length=int(1e4)),
            discount_factor_gamma=0.99,
            action_noise_sigma=0.1,
            action_noise_sigma_decay=1.,
            action_minima=-2,
            action_maxima=2,
            target_noise_sigma=0.2,
            target_noise_clip=0.5)

rollout = Rolling(agent, env)
Ejemplo n.º 3
0
from trickster.agent import DoubleDQN
from trickster.rollout import Rolling, Trajectory, RolloutConfig
from trickster.experience import Experience
from trickster.model import mlp
from trickster.utility import gymic

env = gymic.rwd_scaled_env("CartPole-v1")
test_env = gymic.rwd_scaled_env("CartPole-v1")

input_shape = env.observation_space.shape
num_actions = env.action_space.n

ann = mlp.wide_mlp_critic_network(input_shape, num_actions, adam_lr=1e-3)

agent = DoubleDQN(ann,
                  action_space=env.action_space,
                  memory=Experience(max_length=10000),
                  epsilon=1.,
                  epsilon_decay=0.99995,
                  epsilon_min=0.1,
                  discount_factor_gamma=0.98)


rollout = Rolling(agent, env, config=RolloutConfig(max_steps=300))
test_rollout = Trajectory(agent, test_env)

rollout.fit(episodes=500, updates_per_episode=32, step_per_update=2, update_batch_size=32,
            testing_rollout=test_rollout, plot_curves=True)
test_rollout.render(repeats=10)
Ejemplo n.º 4
0
from trickster.agent import A2C
from trickster.rollout import Rolling, Trajectory, RolloutConfig
from trickster.model import mlp
from trickster.utility import gymic

env = gymic.rwd_scaled_env()
input_shape = env.observation_space.shape
num_actions = env.action_space.n

actor, critic = mlp.wide_pg_actor_critic(input_shape, num_actions)

agent = A2C(actor,
            critic,
            action_space=env.action_space,
            discount_factor_gamma=0.98,
            entropy_penalty_coef=0.01)

rollout = Rolling(agent, env, config=RolloutConfig(max_steps=300))
test_rollout = Trajectory(agent, gymic.rwd_scaled_env())

rollout.fit(episodes=1000, updates_per_episode=64, step_per_update=1, testing_rollout=test_rollout, plot_curves=True)
test_rollout.render(repeats=10)
Ejemplo n.º 5
0
from trickster.agent import DQN
from trickster.experience import Experience
from trickster.rollout import MultiRolling, RolloutConfig, Trajectory
from trickster.utility import gymic
from trickster.model import mlp

envs = [gymic.rwd_scaled_env("CartPole-v1") for _ in range(8)]
test_env = gymic.rwd_scaled_env("CartPole-v1")

input_shape = envs[0].observation_space.shape
num_actions = envs[0].action_space.n

ann = mlp.wide_mlp_critic_network(input_shape, num_actions, adam_lr=1e-4)

agent = DQN(ann,
            action_space=2,
            memory=Experience(max_length=10000),
            epsilon=1.,
            epsilon_decay=0.99995,
            epsilon_min=0.1,
            discount_factor_gamma=0.98,
            use_target_network=True)

rollout = MultiRolling(agent,
                       envs,
                       rollout_configs=RolloutConfig(max_steps=300))
test_rollout = Trajectory(agent, test_env)

rollout.fit(episodes=500,
            updates_per_episode=128,
            steps_per_update=1,
Ejemplo n.º 6
0
from trickster.agent import REINFORCE
from trickster.rollout import Trajectory, RolloutConfig
from trickster.utility import gymic
from trickster.model import mlp

env = gymic.rwd_scaled_env()
input_shape = env.observation_space.shape
num_actions = env.action_space.n

policy = mlp.wide_mlp_actor_categorical(input_shape, num_actions, adam_lr=1e-4)
agent = REINFORCE(policy, action_space=num_actions)
rollout = Trajectory(agent, env, config=RolloutConfig(max_steps=300))

rollout.fit(episodes=500, rollouts_per_update=1, update_batch_size=-1)
rollout.render(repeats=10)
Ejemplo n.º 7
0
from trickster.agent import A2C
from trickster.rollout import MultiRolling, Trajectory
from trickster.utility import gymic
from trickster.model import mlp

envs = [gymic.rwd_scaled_env() for _ in range(8)]
input_shape = envs[0].observation_space.shape
num_actions = envs[0].action_space.n

actor, critic = mlp.wide_pg_actor_critic(input_shape,
                                         num_actions,
                                         critic_lr=5e-4)

agent = A2C(actor,
            critic,
            action_space=envs[0].action_space,
            discount_factor_gamma=0.98,
            entropy_penalty_coef=0.05)

rollout = MultiRolling(agent, envs)
test_rollout = Trajectory(agent, gymic.rwd_scaled_env())

rollout.fit(episodes=300,
            updates_per_episode=128,
            steps_per_update=1,
            testing_rollout=test_rollout)
test_rollout.render()
Ejemplo n.º 8
0
import keras

from trickster.agent import REINFORCE
from trickster.rollout import Trajectory
from trickster.experience import Experience
from trickster.utility import gymic
from trickster.model import mlp

env = gymic.rwd_scaled_env("LunarLander-v2")
input_shape = env.observation_space.shape
num_actions = env.action_space.n

policy = mlp.wide_mlp_actor_categorical(input_shape, num_actions)
policy.compile(optimizer=keras.optimizers.SGD(lr=2e-4, momentum=0.9),
               loss="categorical_crossentropy")

agent = REINFORCE(policy,
                  action_space=num_actions,
                  memory=Experience(max_length=10000),
                  discount_factor_gamma=0.99)

rollout = Trajectory(agent, env)
rollout.fit(episodes=1000, rollouts_per_update=16, update_batch_size=-1)
rollout.render(repeats=10)