def test(agents_nb, continued = False, erase = True, steps_nb = 100, probability=0.33, test_duration = 400, alpha=0.3, gamma=0.8, epsilon=0.8, training_params={'cycle_nb': 1, 'prob_step': 10, 'game_duration': 20}): agent_list = [Agent() for i in range(agents_nb)] old_results = [] if continued and agent_exists("stat2d"+str(agents_nb)): training_params, probability, agents_nb, old_results, agent_list = load_test("stat2d"+str(agents_nb), agent_list) hits = old_results + [[0 for i in range(agents_nb)] for j in range(steps_nb)] game = Game(probability, 5) for agent_id in range(agents_nb): agent = agent_list[agent_id] for step in range(steps_nb): game.reset() hits[len(hits) - steps_nb + step][agent_id] = play_game(agent, game, test_duration) agent = train(agent = agent, training_params=training_params, learn_rate=alpha, discount_rate=gamma, policy=epsilon, show_prints=False) print("Agent", agent_id+1) if erase: file = open ("saves/stat2d"+str(agents_nb)+".json", "w") data = {} for i in range(agents_nb): data["data"+str(i)] = {str(k): v for k,v in agent_list[i].actions_value.items()} to_write = json.dumps({ 'data': data, 'options': { 'training_params': training_params, 'test_params': { 'probability': probability, 'agents_nb': agents_nb, } }, 'old_results': hits }) file.write(to_write) file.close() file = open ("saves/stat2d"+str(agents_nb)+".json", "r") return hits
import gym import monte_carlo import q_learning if __name__ == '__main__': env = gym.make("FrozenLakeEasy-v0") monte_carlo.train(env=env) q_learning.train(env=env)
agent_list = [Agent() for i in range(10)] game = Game(0, 5) for agent_id in range(len(agent_list)): agent = agent_list[agent_id] for cycle in range(cycle_nb): for i in range(prob_step): game.reset() game.probability = float(i) / prob_step nb_hits = play_game(agent, game, 100) hits[cycle, i] += nb_hits max_hits[cycle, i] = max(max_hits[cycle, i], nb_hits) min_hits[cycle, i] = min(min_hits[cycle, i], nb_hits) agent = train(agent=agent, training_params={ 'cycle_nb': cycles_per_cycle, 'prob_step': 10, 'game_duration': 10 }, show_prints=False) print("Cycle", cycle + 1) hits /= len(agent_list) ax = plt.figure().add_subplot(111, projection="3d") X, Y = np.meshgrid(np.arange(0, 1, prob_step / 100), np.arange(0, cycle_nb * cycles_per_cycle, cycles_per_cycle)) ax.plot_surface(X, Y, hits, rstride=1, cstride=1, cmap='summer') ax.set_xlabel("Shoot probability") ax.set_ylabel("Cycles")
from q_learning import load_agent, agent_exists, Agent, train from ui import start_simulation if agent_exists('save_file'): agent, _ = load_agent('save_file') else: agent = Agent() want_to_train = input('Train ? (True/False) ') if want_to_train == str(True): agent = train(agent, 'save_file') def choose_action(state): global agent return agent.choose_best_action(state) start_simulation(choose_action)