Ejemplo n.º 1
0
def make_sarsa_rl_agent(processor: Processor_56x5, nbr_layers=2):
    model = processor.create_model(nbr_layers=nbr_layers)
    test_policy = GreedyQPolicy()

    sarsa_agent = SarsaAgent(model=model,
                             nb_actions=NBR_TICHU_ACTIONS,
                             nb_steps_warmup=10,
                             gamma=0.99,
                             test_policy=test_policy,
                             processor=processor)
    sarsa_agent.compile(Adam(lr=1e-3), metrics=['mae'])
    return sarsa_agent
Ejemplo n.º 2
0
def test_sarsa():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    policy = EpsGreedyQPolicy(eps=.1)
    sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy)
    sarsa.compile(Adam(lr=1e-3))

    sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0)
    policy.eps = 0.
    h = sarsa.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
print('Observations: ', (1, ) + env.observation_space.shape)

# create model
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

# configure agent
policy = BoltzmannQPolicy()
dqn = SarsaAgent(model=model,
                 nb_actions=nb_actions,
                 nb_steps_warmup=10,
                 policy=policy)
dqn.compile(Adam(lr=1e-5), metrics=['mae'])

# run agent
history = dqn.fit(env,
                  nb_steps=10000,
                  visualize=False,
                  verbose=1,
                  log_interval=100)
plt.plot(history.history['episode_reward'])
plt.show()
Ejemplo n.º 4
0
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=False)

else:
    # SARSA
    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()

    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(state_size/2))
    model.add(Activation('relu'))
    model.add(Dense(state_size/4))
    model.add(Activation('relu'))
    model.add(Dense(state_size/8))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))
    print(model.summary())

    sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    sarsa.fit(env, nb_steps=500000, visualize=False, verbose=1)

    # After training is done, we save the final weights.
    sarsa.save_weights('weights.h5f', overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    sarsa.test(env, nb_episodes=5, visualize=False)
Ejemplo n.º 5
0
elif not args.shield and args.big_neg:
    shield = None
    ENV_NAME = 'Car_noshield_huge_neg'
    filename = '_0_huge_neg_avg_reward.data'
    huge_neg = True
    # pkl_name = 'car-noshield.pkl'
    save_name = "bigneg.npy"

if args.sarsa:
    filename = 'sarsa_' + filename
    # pkl_name = 'sarsa_' + pkl_name
    ENV_NAME = 'sarsa_' + ENV_NAME
    dqn = SarsaAgent(model=model,
                     nb_actions=nb_actions,
                     nb_steps_warmup=10,
                     policy=policy,
                     shield=shield,
                     preemtive=args.preemtive)

elif not args.sarsa:
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50,
                   target_model_update=1e-2,
                   policy=policy,
                   shield=shield,
                   huge_neg=huge_neg,
                   preemptive=args.preemptive)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])
Ejemplo n.º 6
0
elif METHOD.upper() == 'DQN':
    memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
    agent = DQNAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     nb_steps_warmup=100,
                     target_model_update=1e-2,
                     policy=train_policy,
                     test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'SARSA':
    # SARSA does not require a memory.
    agent = SarsaAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=train_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'CEM':
    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    agent = CEMAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     batch_size=50,
                     nb_steps_warmup=2000,
                     train_interval=50,
                     elite_frac=0.05)
    agent.compile()

else: