def make_sarsa_rl_agent(processor: Processor_56x5, nbr_layers=2): model = processor.create_model(nbr_layers=nbr_layers) test_policy = GreedyQPolicy() sarsa_agent = SarsaAgent(model=model, nb_actions=NBR_TICHU_ACTIONS, nb_steps_warmup=10, gamma=0.99, test_policy=test_policy, processor=processor) sarsa_agent.compile(Adam(lr=1e-3), metrics=['mae']) return sarsa_agent
def test_sarsa(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) policy = EpsGreedyQPolicy(eps=.1) sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy) sarsa.compile(Adam(lr=1e-3)) sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0) policy.eps = 0. h = sarsa.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
print('Observations: ', (1, ) + env.observation_space.shape) # create model model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) # configure agent policy = BoltzmannQPolicy() dqn = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) dqn.compile(Adam(lr=1e-5), metrics=['mae']) # run agent history = dqn.fit(env, nb_steps=10000, visualize=False, verbose=1, log_interval=100) plt.plot(history.history['episode_reward']) plt.show()
# Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=False) else: # SARSA # SARSA does not require a memory. policy = BoltzmannQPolicy() model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(state_size/2)) model.add(Activation('relu')) model.add(Dense(state_size/4)) model.add(Activation('relu')) model.add(Dense(state_size/8)) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) sarsa.compile(Adam(lr=1e-3), metrics=['mae']) sarsa.fit(env, nb_steps=500000, visualize=False, verbose=1) # After training is done, we save the final weights. sarsa.save_weights('weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. sarsa.test(env, nb_episodes=5, visualize=False)
elif not args.shield and args.big_neg: shield = None ENV_NAME = 'Car_noshield_huge_neg' filename = '_0_huge_neg_avg_reward.data' huge_neg = True # pkl_name = 'car-noshield.pkl' save_name = "bigneg.npy" if args.sarsa: filename = 'sarsa_' + filename # pkl_name = 'sarsa_' + pkl_name ENV_NAME = 'sarsa_' + ENV_NAME dqn = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy, shield=shield, preemtive=args.preemtive) elif not args.sarsa: dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-2, policy=policy, shield=shield, huge_neg=huge_neg, preemptive=args.preemptive) dqn.compile(Adam(lr=1e-3), metrics=['mae'])
elif METHOD.upper() == 'DQN': memory = SequentialMemory(limit=NUM_STEPS, window_length=1) agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=train_policy, test_policy=test_policy) agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae']) elif METHOD.upper() == 'SARSA': # SARSA does not require a memory. agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy) agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae']) elif METHOD.upper() == 'CEM': memory = EpisodeParameterMemory(limit=1000, window_length=1) agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) agent.compile() else: