def test(agents_nb, continued = False, erase = True, steps_nb = 100, probability=0.33, test_duration = 400, alpha=0.3, gamma=0.8, epsilon=0.8, training_params={'cycle_nb': 1, 'prob_step': 10, 'game_duration': 20}):
    agent_list = [Agent() for i in range(agents_nb)]
    old_results = []
    if continued and agent_exists("stat2d"+str(agents_nb)):
        training_params, probability, agents_nb, old_results, agent_list = load_test("stat2d"+str(agents_nb), agent_list)
    hits = old_results + [[0 for i in range(agents_nb)] for j in range(steps_nb)]
    
    game = Game(probability, 5)

    for agent_id in range(agents_nb):
        agent = agent_list[agent_id]
        for step in range(steps_nb):
            game.reset()
            hits[len(hits) - steps_nb + step][agent_id] = play_game(agent, game, test_duration)

            agent =  train(agent = agent, training_params=training_params, learn_rate=alpha, discount_rate=gamma, policy=epsilon, show_prints=False)
            
        print("Agent", agent_id+1)

    if erase:           
        file = open ("saves/stat2d"+str(agents_nb)+".json", "w")
        data = {}
        for i in range(agents_nb):
            data["data"+str(i)] = {str(k): v for k,v in agent_list[i].actions_value.items()}
        to_write = json.dumps({
            'data': data,
            'options': {
                'training_params': training_params,
                'test_params': {
                    'probability': probability,
                    'agents_nb': agents_nb,
                }
            },
            'old_results': hits
        })
        file.write(to_write)
        file.close()
        file = open ("saves/stat2d"+str(agents_nb)+".json", "r")
    return hits
import gym
import monte_carlo
import q_learning

if __name__ == '__main__':
    env = gym.make("FrozenLakeEasy-v0")
    monte_carlo.train(env=env)
    q_learning.train(env=env)
Beispiel #3
0
agent_list = [Agent() for i in range(10)]
game = Game(0, 5)
for agent_id in range(len(agent_list)):
    agent = agent_list[agent_id]
    for cycle in range(cycle_nb):
        for i in range(prob_step):
            game.reset()
            game.probability = float(i) / prob_step
            nb_hits = play_game(agent, game, 100)
            hits[cycle, i] += nb_hits
            max_hits[cycle, i] = max(max_hits[cycle, i], nb_hits)
            min_hits[cycle, i] = min(min_hits[cycle, i], nb_hits)
        agent = train(agent=agent,
                      training_params={
                          'cycle_nb': cycles_per_cycle,
                          'prob_step': 10,
                          'game_duration': 10
                      },
                      show_prints=False)
        print("Cycle", cycle + 1)

hits /= len(agent_list)

ax = plt.figure().add_subplot(111, projection="3d")

X, Y = np.meshgrid(np.arange(0, 1, prob_step / 100),
                   np.arange(0, cycle_nb * cycles_per_cycle, cycles_per_cycle))
ax.plot_surface(X, Y, hits, rstride=1, cstride=1, cmap='summer')

ax.set_xlabel("Shoot probability")
ax.set_ylabel("Cycles")
from q_learning import load_agent, agent_exists, Agent, train
from ui import start_simulation

if agent_exists('save_file'):
    agent, _ = load_agent('save_file')
else:
    agent = Agent()

want_to_train = input('Train ? (True/False) ')
if want_to_train == str(True):
    agent = train(agent, 'save_file')


def choose_action(state):
    global agent
    return agent.choose_best_action(state)


start_simulation(choose_action)