#init DQN agent agent = libs.libs_agent.agent_dqn.DQNAgent( env, "networks/settlers_network/parameters.json", 0.2, 0.1) #0.2, 0.1 #agent = libs_agent.agent.Agent(env) #process training training_iterations = 500000 for iteration in range(0, training_iterations): agent.main() #print training progress %, ane score, every 100th iterations if iteration % 100 == 0: env._print() print(iteration * 100.0 / training_iterations, env.get_score()) agent.save("networks/settlers_network/trained/") agent.load("networks/settlers_network/trained/") #reset score env.reset_score() #choose only the best action agent.run_best_enable() #process testing iterations testing_iterations = 10000 for iteration in range(0, testing_iterations): agent.main() print("move=", env.get_move(), " score=", env.get_score(), " moves to win=", env.get_moves_to_win())
env.render() if env.get_iterations()%256 == 0: str_progress = str(env.get_iterations()) + " " str_progress+= str(env.get_games_count()) + " " str_progress+= str(agent.get_epsilon_training()) + " " str_progress+= str(env.get_score()) + " " str_progress+= "\n" training_progress_log.put_string(str_progress) print("done = ", env.get_games_count()*100.0/total_games_to_play, "%", " eps = ", agent.get_epsilon_training(), " iterations = ", env.get_iterations()) if env.get_iterations()%50000 == 0: print("SAVING network") agent.save(network_path + "trained/") agent.save(network_path + "trained/") agent.load(network_path + "trained/") #reset score env.reset_score() #choose only the best action agent.run_best_enable()
''' #init DQN agent agent = libs.libs_agent.agent_dqn.DQNAgent( env, "networks/arkanoid_network_b/parameters.json", 0.2, 0.02, 0.99999) #process training training_iterations = 250000 for iteration in range(0, training_iterations): agent.main() #print training progress %, ane score, every 100th iterations if iteration % 100 == 0: env._print() agent.save("networks/arkanoid_network_b/trained/") agent.load("networks/arkanoid_network_b/trained/") #reset score env.reset_score() #choose only the best action agent.run_best_enable() #process testing iterations testing_iterations = 10000 for iteration in range(0, testing_iterations): agent.main() env._print()