print('TPG Gen: ' + str(trainer.populations[envName].curGen)) for envName in allEnvNames: # train on each env print('Playing Game: ' + envName) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, envName, scoreList, 3, 18000, None) for agent in trainer.getAllAgents( skipTasks=[envName], noRef=True, popName=envName)]) trainer.applyScores(scoreList, popName=envName) # report curEnv results to log scoreStats = trainer.getTaskScores(envName, popName=envName) bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0] with open(logName, 'a') as f: f.write( str(trainer.populations[envName].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + envName + ',' + str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' + str(scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + '\n') # do evolution on each env played trainer.evolve(fitMethod='single', tasks=[envName], elitistTasks=[envName], popName=envName)
def ggpTrainAllAtOnce(): # create TPG trainer = TpgTrainer(actions=range(18), teamPopSize=options.popSize, rTeamPopSize=options.popSize, maxProgramSize=128) envNamesSrt = sorted(list(allEnvNames)) # for reporting envs played fitnessEnvs = [] while True: # train indefinately print('TPG Gen: ' + str(trainer.populations[None].curGen)) if options.fitGamesNum < options.numGames and options.fitGamesNum > 0: fitnessEnvs.append( random.choice(list(set(allEnvNames) - set(fitnessEnvs)))) if len(fitnessEnvs) > options.fitGamesNum: fitnessEnvs.pop(0) else: fitnessEnvs = list(allEnvNames) for envName in fitnessEnvs: print('Playing Game: ' + envName) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, envName, scoreList, options.trainEps, options.trainFrames, None) for agent in trainer.getAllAgents(skipTasks=[envName], noRef=True)]) trainer.applyScores(scoreList) # report curEnv results to log scoreStats = trainer.getTaskScores(envName) bestTeam = trainer.getBestTeams(tasks=[envName])[0] with open(logFileGameScoresName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + envName + ',' + str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' + str(scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + '\n') # do evolution after all envs played trainer.multiEvolve(tasks=[fitnessEnvs] + [[en] for en in fitnessEnvs], weights=[0.5] + [0.5 / len(fitnessEnvs) for _ in fitnessEnvs], fitMethod='min', elitistTasks=allEnvNames) # report generational fitness results bestTeam = trainer.getBestTeams(tasks=fitnessEnvs)[0] with open(logFileFitnessName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + '/'.join(sorted(list(fitnessEnvs))) + ',' + str(trainer.populations[None].scoreStats['min']) + ',' + str(trainer.populations[None].scoreStats['max']) + ',' + str(trainer.populations[None].scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + ',' + str(len(trainer.populations[None].teams)) + ',' + str(len(trainer.populations[None].rootTeams)) + '\n') # save model after every gen with open(trainerFileName, 'wb') as f: pickle.dump(trainer, f) # every 10 generations save all agent outcomes if trainer.populations[None].curGen % 10 == 0: for rt in trainer.populations[None].rootTeams: with open(logFileOutcomesName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + str(rt.uid)) for envName in allEnvNames: f.write(',' + str(rt.outcomes.get(envName, '-999999'))) f.write('\n') # every 50 generations evaluate top agents on all games if trainer.populations[None].curGen % options.champEvalGen == 0: champEval(envNamesSrt, trainer, logFileChampionsName, pool, man, tstart, frames=options.testFrames, eps=options.testEps)