Python A2C Examples

Programming Language: Python

Namespace/Package Name: trickster.agent

Class/Type: A2C

Examples at hotexamples.com: 7

Python A2C - 7 examples found. These are the top rated real world Python examples of trickster.agent.A2C extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

A2C(6)

from_environment(1)

Frequently Used Methods

A2C (6)

from_environment (1)

Example #1

Show file

    def test_a2c_doesnt_store_invalid_transitions_in_td_setting(self):

        STEPS = 35

        env = DummyEnv()
        agent = A2C.from_environment(env, discount_gamma=0.)
        rollout = Rolling(agent, env)

        rollout.roll(STEPS, verbose=0, push_experience=True)

        data = agent.memory_sampler.sample(-1)

        self.assertEqual(agent.episodes, 3)
        np.testing.assert_array_less(data["state"], 10)
        self.assertEqual(len(data["state"]), STEPS - 4)

Example #2

Show file

])
actor.compile(loss="categorical_crossentropy", optimizer=Adam(1e-4))

critic = Sequential([
    Dense(16,
          activation="relu",
          input_shape=input_shape,
          kernel_initializer="he_uniform"),
    Dense(16, activation="relu", kernel_initializer="he_uniform"),
    Dense(1, activation="linear", kernel_initializer="he_uniform")
])
critic.compile(loss="mse", optimizer=Adam(5e-4))

agent = A2C(actor,
            critic,
            action_space=env.action_space,
            memory=Experience(max_length=10000),
            discount_factor_gamma=0.98,
            entropy_penalty_coef=0.01)

rollout = Rolling(agent, env, config=RolloutConfig(max_steps=300))
test_rollout = Trajectory(agent, gym.make("CartPole-v1"))

rewards = []
actor_loss = []
actor_utility = []
actor_entropy = []
critic_loss = []

for episode in range(1, 1001):
    episode_actor_loss = []
    episode_actor_utility = []

Example #3

Show file

from trickster.agent import A2C
from trickster.rollout import Rolling, Trajectory, RolloutConfig
from trickster.model import mlp
from trickster.utility import gymic

env = gymic.rwd_scaled_env()
input_shape = env.observation_space.shape
num_actions = env.action_space.n

actor, critic = mlp.wide_pg_actor_critic(input_shape, num_actions)

agent = A2C(actor,
            critic,
            action_space=env.action_space,
            discount_factor_gamma=0.98,
            entropy_penalty_coef=0.01)

rollout = Rolling(agent, env, config=RolloutConfig(max_steps=300))
test_rollout = Trajectory(agent, gymic.rwd_scaled_env())

rollout.fit(episodes=1000, updates_per_episode=64, step_per_update=1, testing_rollout=test_rollout, plot_curves=True)
test_rollout.render(repeats=10)

Example #4

Show file

    Dense(400, activation="relu", input_shape=input_shape),
    Dense(300, activation="relu"),
    Dense(num_actions, activation="softmax")
])
actor.compile(loss="categorical_crossentropy", optimizer=Adam(1e-4))

critic = Sequential([
    Dense(400, activation="relu", input_shape=input_shape),
    Dense(300, activation="relu"),
    Dense(1, activation="linear")
])
critic.compile(loss="mse", optimizer=Adam(1e-4))

agent = A2C(actor,
            critic,
            action_space=num_actions,
            absolute_memory_limit=10000,
            discount_factor_gamma=0.99,
            entropy_penalty_coef=0.05)

rollout = Rolling(agent.create_workers(1)[0], env)
test_rollout = Trajectory(agent, Lunar())


def train():
    hst = history.History("reward_sum", "actor_loss", "actor_utility",
                          "actor_utility_std", "actor_entropy", "values",
                          "advantages", "critic_loss")

    for episode in range(1, 1001):

        for update in range(1, 4):

Example #5

Show file

from trickster.model import mlp
from trickster.rollout import MultiRolling, Trajectory, RolloutConfig
from trickster.utility import gymic

NUM_ENVS = 8

envs = [gymic.rwd_scaled_env("LunarLander-v2") for _ in range(NUM_ENVS)]
test_env = gymic.rwd_scaled_env("LunarLander-v2")
input_shape = envs[0].observation_space.shape
num_actions = envs[0].action_space.n

actor, critic = mlp.wide_pg_actor_critic(input_shape, num_actions)

agent = A2C(actor,
            critic,
            action_space=num_actions,
            discount_factor_gamma=0.99,
            entropy_penalty_coef=0.05)

rollout = MultiRolling(agent,
                       envs,
                       rollout_configs=RolloutConfig(max_steps=300))
test_rollout = Trajectory(agent, test_env)

rollout.fit(episodes=1000,
            updates_per_episode=16,
            steps_per_update=1,
            update_batch_size=-1,
            testing_rollout=test_rollout,
            plot_curves=True)
test_rollout.render(repeats=10)

Example #6

Show file

critic_stream = Conv2D(32, (4, 4), strides=(2, 2), padding="same")(critic_stream)  # 16
critic_stream = LeakyReLU()(critic_stream)
critic_stream = Conv2D(64, (4, 4), strides=(2, 2), padding="same")(critic_stream)  # 8
critic_stream = LeakyReLU()(critic_stream)
critic_stream = Conv2D(1, (1, 1), padding="valid")(critic_stream)
value_estimate = GlobalAveragePooling2D()(critic_stream)
# value_estimate = Flatten()(critic_stream)

actor = Model(actor_input, action_probs, name="Actor")
actor.compile(SGD(1e-4, momentum=0.9), "categorical_crossentropy")
critic = Model(critic_input, value_estimate, name="Critic")
critic.compile(SGD(5e-4, momentum=0.9), "mse")

agent = A2C(actor, critic,
            action_space=MOVES,
            memory=Experience(max_length=10000),
            discount_factor_gamma=0.995,
            entropy_penalty_coef=0.0,
            state_preprocessor=lambda state: state / 255.)

episode = 1

reward_memory = deque(maxlen=10)
step_lengths = deque(maxlen=10)
critic_losses = deque(maxlen=10)
actor_losses = deque(maxlen=10)
actor_utility = deque(maxlen=10)
actor_entropy = deque(maxlen=10)

rollout = MultiRolling(agent, envs, rollout_configs=RolloutConfig(max_steps=512, skipframes=2))
test_rollout = Trajectory(agent, test_env, config=RolloutConfig(max_steps=512, skipframes=2))

Example #7

Show file

File: fit_match_a2c.py Project: afcarl/trickster

from trickster.agent import A2C
from trickster.rollout import MultiRolling, Trajectory, RolloutConfig
from trickster.model import mlp

cfg = MatchConfig(canvas_size=(100, 100),
                  players_per_side=2,
                  learning_type=MatchConfig.LEARNING_TYPE_SINGLE_AGENT,
                  observation_type=MatchConfig.OBSERVATION_TYPE_VECTOR)

envs = [Match(cfg) for _ in range(8)]
test_env = Match(cfg)

actor, critic = mlp.wide_pg_actor_critic(envs[0].observation_space.shape,
                                         envs[0].action_space.n,
                                         actor_lr=1e-4,
                                         critic_lr=1e-4)

agent = A2C(actor, critic, test_env.action_space, entropy_penalty_coef=0.1)

rcfg = RolloutConfig(max_steps=512, skipframes=2)

training_rollout = MultiRolling(agent, envs, rcfg)
testing_rollout = Trajectory(agent, test_env, rcfg)

training_rollout.fit(episodes=1000,
                     updates_per_episode=512,
                     steps_per_update=1,
                     testing_rollout=testing_rollout)
testing_rollout.render(repeats=10)