def main(): # initiates trainer, actions are these predictions of the agent: # action 1 -> right box will fall off the edge # action 0 -> right box will not fall off trainer = TpgTrainer(actions=[0, 1], teamPopSize=50) _min, _max, _avg = [], [], [] # hold values for every generation for gen in range(GENERATIONS): # generation loop print("Generation: ", gen + 1, "/", GENERATIONS) curScores = [] # new list per gen while True: # loop to go through agents agent = trainer.getNextAgent() if agent is None: break # no more agents, so proceed to next gen # evaluting the agent score = 0 for i in range(EVALUATIONS): score += evaluateAgent(agent) agent.reward(score) curScores.append(score) print("Min:", min(curScores), " Max:", max(curScores), " Avg:", sum(curScores) / len(curScores), "(out of " + str(EVALUATIONS) + ")\n") _min.append(min(curScores)) _max.append(max(curScores)) _avg.append(sum(curScores) / len(curScores)) trainer.evolve() # getting best agent after all the generations best_agent, best_score = getBestAgent(trainer) print("Best agent's score:", best_score, "/", EVALUATIONS) for run in range(FINAL_RUNS): print("Final run: ", run + 1, "/", FINAL_RUNS, end='\r') evaluateAgent(best_agent, graphics=True) # plotting progress over the generations generations = range(1, GENERATIONS + 1) axes = plt.gca() axes.set_ylim([0, EVALUATIONS]) plt.plot(generations, _min, label="min") plt.plot(generations, _max, label="max") plt.plot(generations, _avg, label="avg") plt.xlabel("generation") plt.ylabel("score") plt.legend() plt.show()
for gen in range(100): # generation loop curScores = [] # new list per gen # get right env in envQueue game = gameQueue.pop() # take out last game print('playing on', game) env = envs[game] # re-get games list if len(gameQueue) == 0: gameQueue = list(allGames) random.shuffle(gameQueue) while True: # loop to go through agents teamNum = trainer.remainingAgents() agent = trainer.getNextAgent() if agent is None: break # no more agents, so proceed to next gen # check if agent already has score if agent.taskDone(): score = agent.getOutcome() else: state = env.reset() # get initial state and prep environment score = 0 valActs = range(env.action_space.n) for i in range(1000): act = agent.act(getState(state), valActs=valActs) # get action from agent