def test_double_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1, ))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
def test_sarsa(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) policy = EpsGreedyQPolicy(eps=.1) sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy) sarsa.compile(Adam(lr=1e-3)) sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0) policy.eps = 0. h = sarsa.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
def test_sarsa(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) policy = EpsGreedyQPolicy(eps=.1) sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy) sarsa.compile(Adam(lr=1e-3)) sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0) policy.eps = 0. h = sarsa.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
def test_duel_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
policy=policy, nb_actions=nb_actions, memory=memory, nb_steps_warmup=400, target_model_update=1e-1, enable_double_dqn=True, enable_dueling_network=True) dqn.compile(Adam(lr=1e-3), metrics=['mae']) outputFile = open("2014.csv", "w+") outputFile.write( "iteration,trainAccuracy,trainCoverage,trainReward,validationAccuracy,validationCoverage,validationReward\n" ) iteration = 0 policy.eps = 0.1 for i in range(0, 100): dqn.fit(trainEnv, nb_steps=3000, visualize=False, callbacks=[trainer], verbose=0) (episodes, trainCoverage, trainAccuracy, trainReward) = trainer.getInfo() dqn.test(validationEnv, nb_episodes=300, verbose=0, callbacks=[validator], visualize=False) (episodes, validCoverage, validAccuracy, validReward) = validator.getInfo() outputFile.write( str(iteration) + "," + str(trainAccuracy) + "," + str(trainCoverage) +
enable_double_dqn=False, memory=memory, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) #dqn.load_weights("Q.weights") bot.send_message(chat_id=telegramChatID, text="Experiment started - " + datetime.datetime.now().strftime("%H:%M")) percIncrement = 100 / epochs perc = 0 for i in range(epochs): policy.eps = 1 dqn.fit(trainEnv, nb_steps=10000, visualize=False, verbose=0) dqn.test(testEnv, nb_episodes=20, verbose=0, visualize=False) perc += percIncrement try: bot.send_message(chat_id=telegramChatID, text=str(perc) + " % - " + datetime.datetime.now().strftime("%H:%M")) except: print(str(perc) + " % - " + datetime.datetime.now().strftime("%H:%M")) trainEnv.changeOutput("walks/train/walk" + str(i + 1) + ".csv") testEnv.changeOutput("walks/test/walk" + str(i + 1) + ".csv") dqn.save_weights("Q.weights", overwrite=True) bot.send_message(chat_id=telegramChatID,