Python REINFORCE Examples

Programming Language: Python

Namespace/Package Name: trickster.agent

Class/Type: REINFORCE

Examples at hotexamples.com: 6

Python REINFORCE - 6 examples found. These are the top rated real world Python examples of trickster.agent.REINFORCE extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

REINFORCE(4)

fit(2)

from_environment(1)

Example #1

Show file

    def test_reinforce_doesnt_store_invalid_transitions(self):

        STEPS = 35

        env = DummyEnv()
        agent = REINFORCE.from_environment(env, discount_gamma=0.)
        rollout = Rolling(agent, env)

        rollout.roll(STEPS, verbose=0, push_experience=True)

        data = agent.memory_sampler.sample(-1)

        self.assertEqual(agent.episodes, 3)
        np.testing.assert_array_less(data["state"], 10)
        self.assertEqual(len(data["state"]), STEPS - 4)

Example #2

Show file

File: fit_cartpole_reinforce.py Project: bendoan4366/trickster

from trickster.agent import REINFORCE
from trickster.rollout import Trajectory, RolloutConfig
from trickster.utility import visual

env = gym.make("CartPole-v1")
input_shape = env.observation_space.shape
num_actions = env.action_space.n

policy = Sequential([
    Dense(16, activation="relu", input_shape=input_shape),
    Dense(16, activation="relu"),
    Dense(num_actions, activation="softmax")
])
policy.compile(loss="categorical_crossentropy", optimizer=Adam(5e-3))

agent = REINFORCE(policy, action_space=num_actions)

rollout = Trajectory(agent, env, config=RolloutConfig(max_steps=300))

rewards = []
losses = []

for episode in range(1, 501):
    rollout_history = rollout.rollout(verbose=0, push_experience=True)
    agent_history = agent.fit(batch_size=-1, verbose=0, reset_memory=True)

    rewards.append(rollout_history["reward_sum"])
    losses.append(agent_history["loss"])

    print("\rEpisode {:>4} RWD: {:>6.1f}, UTILITY: {: >8.4f}".format(
        episode, np.mean(rewards[-10:]), np.mean(losses[-10:])),

Example #3

Show file

actor_stream = LeakyReLU()(actor_stream)
actor_stream = Conv2D(64, (3, 3), strides=(2, 2),
                      padding="same")(actor_stream)  # 4
actor_stream = LeakyReLU()(actor_stream)
actor_stream = Conv2D(128, (4, 4))(actor_stream)
actor_stream = LeakyReLU()(actor_stream)

actor_stream = Dense(32, activation="relu")(actor_stream)
action_probs = Dense(NUM_MOVES, activation="softmax")(actor_stream)

actor = Model(common_input, action_probs, name="Actor")
actor.compile(Adam(1e-3), "categorical_crossentropy")

agent = REINFORCE(actor,
                  action_space=MOVES,
                  memory=Experience(max_length=10000),
                  discount_factor_gamma=0.99,
                  state_preprocessor=preprocess)

screen = CV2Screen(scale=2)
episode = 0
reward_memory = deque(maxlen=100)
critic_losses = deque(maxlen=50)
actor_losses = deque(maxlen=100)

rollout = MultiTrajectory(agent,
                          envs,
                          warmup_episodes=WARMUP,
                          rollout_configs=RolloutConfig(max_steps=512,
                                                        skipframes=4))
history = {"episode": 0}

Example #4

Show file

envs = [FakeEnv() for _ in range(10)]
test_env = FakeEnv()

actor = Sequential([  # 200, 160
    Flatten(input_shape=test_env.shape),
    Dense(256),
    BatchNormalization(),
    LeakyReLU(),
    Dense(2, activation="softmax")
])
actor.compile(RMSprop(1e-4, rho=0.99), "categorical_crossentropy")

agent = REINFORCE(actor,
                  2,
                  Experience(),
                  discount_factor_gamma=0.99,
                  state_preprocessor=None)

rollout = MultiTrajectory([agent for _ in range(10)], envs)
test_rollout = Trajectory(agent, test_env)

rewards = deque(maxlen=100)
actor_loss = deque(maxlen=100)
actor_utility = deque(maxlen=100)
actor_entropy = deque(maxlen=100)
critic_loss = deque(maxlen=100)

episode = 0

while 1:

Example #5

Show file

from trickster.agent import REINFORCE
from trickster.rollout import Trajectory, RolloutConfig
from trickster.utility import gymic
from trickster.model import mlp

env = gymic.rwd_scaled_env()
input_shape = env.observation_space.shape
num_actions = env.action_space.n

policy = mlp.wide_mlp_actor_categorical(input_shape, num_actions, adam_lr=1e-4)
agent = REINFORCE(policy, action_space=num_actions)
rollout = Trajectory(agent, env, config=RolloutConfig(max_steps=300))

rollout.fit(episodes=500, rollouts_per_update=1, update_batch_size=-1)
rollout.render(repeats=10)

Example #6

Show file

File: fit_lunar_reinforce.py Project: afcarl/trickster

import keras

from trickster.agent import REINFORCE
from trickster.rollout import Trajectory
from trickster.experience import Experience
from trickster.utility import gymic
from trickster.model import mlp

env = gymic.rwd_scaled_env("LunarLander-v2")
input_shape = env.observation_space.shape
num_actions = env.action_space.n

policy = mlp.wide_mlp_actor_categorical(input_shape, num_actions)
policy.compile(optimizer=keras.optimizers.SGD(lr=2e-4, momentum=0.9),
               loss="categorical_crossentropy")

agent = REINFORCE(policy,
                  action_space=num_actions,
                  memory=Experience(max_length=10000),
                  discount_factor_gamma=0.99)

rollout = Trajectory(agent, env)
rollout.fit(episodes=1000, rollouts_per_update=16, update_batch_size=-1)
rollout.render(repeats=10)