Ejemplo n.º 1
0
#init DQN agent
agent = libs.libs_agent.agent_dqn.DQNAgent(
    env, "networks/settlers_network/parameters.json", 0.2, 0.1)  #0.2, 0.1
#agent = libs_agent.agent.Agent(env)

#process training
training_iterations = 500000

for iteration in range(0, training_iterations):
    agent.main()
    #print training progress %, ane score, every 100th iterations
    if iteration % 100 == 0:
        env._print()
        print(iteration * 100.0 / training_iterations, env.get_score())

agent.save("networks/settlers_network/trained/")

agent.load("networks/settlers_network/trained/")

#reset score
env.reset_score()

#choose only the best action
agent.run_best_enable()

#process testing iterations
testing_iterations = 10000
for iteration in range(0, testing_iterations):
    agent.main()
    print("move=", env.get_move(), " score=", env.get_score(),
          " moves to win=", env.get_moves_to_win())
Ejemplo n.º 2
0
            env.render()


    if env.get_iterations()%256 == 0:
        str_progress = str(env.get_iterations()) + " "
        str_progress+= str(env.get_games_count()) + " "
        str_progress+= str(agent.get_epsilon_training()) + " "
        str_progress+= str(env.get_score()) + " "
        str_progress+= "\n"
        training_progress_log.put_string(str_progress)

        print("done = ", env.get_games_count()*100.0/total_games_to_play, "%", " eps = ", agent.get_epsilon_training(), " iterations = ",  env.get_iterations())

    if env.get_iterations()%50000 == 0:
        print("SAVING network")
        agent.save(network_path + "trained/")

agent.save(network_path + "trained/")


agent.load(network_path + "trained/")



#reset score
env.reset_score()

#choose only the best action
agent.run_best_enable()

Ejemplo n.º 3
0
'''

#init DQN agent
agent = libs.libs_agent.agent_dqn.DQNAgent(
    env, "networks/arkanoid_network_b/parameters.json", 0.2, 0.02, 0.99999)

#process training
training_iterations = 250000

for iteration in range(0, training_iterations):
    agent.main()
    #print training progress %, ane score, every 100th iterations
    if iteration % 100 == 0:
        env._print()

agent.save("networks/arkanoid_network_b/trained/")

agent.load("networks/arkanoid_network_b/trained/")

#reset score
env.reset_score()

#choose only the best action
agent.run_best_enable()

#process testing iterations
testing_iterations = 10000
for iteration in range(0, testing_iterations):
    agent.main()
    env._print()