def main(): # initiates trainer, actions are these predictions of the agent: # action 1 -> right box will fall off the edge # action 0 -> right box will not fall off trainer = TpgTrainer(actions=[0, 1], teamPopSize=50) _min, _max, _avg = [], [], [] # hold values for every generation for gen in range(GENERATIONS): # generation loop print("Generation: ", gen + 1, "/", GENERATIONS) curScores = [] # new list per gen while True: # loop to go through agents agent = trainer.getNextAgent() if agent is None: break # no more agents, so proceed to next gen # evaluting the agent score = 0 for i in range(EVALUATIONS): score += evaluateAgent(agent) agent.reward(score) curScores.append(score) print("Min:", min(curScores), " Max:", max(curScores), " Avg:", sum(curScores) / len(curScores), "(out of " + str(EVALUATIONS) + ")\n") _min.append(min(curScores)) _max.append(max(curScores)) _avg.append(sum(curScores) / len(curScores)) trainer.evolve() # getting best agent after all the generations best_agent, best_score = getBestAgent(trainer) print("Best agent's score:", best_score, "/", EVALUATIONS) for run in range(FINAL_RUNS): print("Final run: ", run + 1, "/", FINAL_RUNS, end='\r') evaluateAgent(best_agent, graphics=True) # plotting progress over the generations generations = range(1, GENERATIONS + 1) axes = plt.gca() axes.set_ylim([0, EVALUATIONS]) plt.plot(generations, _min, label="min") plt.plot(generations, _max, label="max") plt.plot(generations, _avg, label="avg") plt.xlabel("generation") plt.ylabel("score") plt.legend() plt.show()
def runTpg(): tmpEnv = gym.make(options.envName) # create TPG trainer = TpgTrainer(actions=range(tmpEnv.action_space.n), teamPopSize=options.popSize, rTeamPopSize=options.popSize, maxProgramSize=128) tmpEnv.close() print('Playing Game: ' + options.envName) while trainer.populations[ None].curGen < options.generations: # train indefinately print('TPG Gen: ' + str(trainer.populations[None].curGen)) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, options.envName, scoreList, options.trainEps, options.trainFrames, None) for agent in trainer.getAllAgents( skipTasks=[options.envName], noRef=True)]) trainer.applyScores(scoreList) # do evolution after all envs played trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName]) # report generational fitness results bestTeam = trainer.getBestTeams(tasks=[options.envName])[0] with open(logFileTpg, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + options.envName + ',' + str(trainer.populations[None].scoreStats['min']) + ',' + str(trainer.populations[None].scoreStats['max']) + ',' + str(trainer.populations[None].scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + ',' + str(len(trainer.populations[None].teams)) + ',' + str(len(trainer.populations[None].rootTeams)) + '\n')
tStart = time.time() processes = 2 # how many to run concurrently m = mp.Manager() allGames = [ 'gvgai-testgame1-lvl0-v0', 'gvgai-testgame1-lvl1-v0', 'gvgai-testgame2-lvl0-v0', 'gvgai-testgame2-lvl0-v0', 'gvgai-testgame3-lvl0-v0', 'gvgai-testgame3-lvl1-v0' ] allGames = ['Assault-v0'] gameQueue = list(allGames) random.shuffle(gameQueue) trainer = TpgTrainer(actions=range(6), teamPopSizeInit=360) pool = mp.Pool(processes=processes, initializer=limit_cpu) summaryScores = [] # record score summaries for each gen (min, max, avg) for gen in range(100): # generation loop scoreQueue = m.Queue() # hold agents when finish, to actually apply score # get right env in envQueue game = gameQueue.pop() # take out last game print('playing on', game) # re-get games list if len(gameQueue) == 0: gameQueue = list(allGames) random.shuffle(gameQueue)
'gvgai-testgame1-lvl0-v0', 'gvgai-testgame1-lvl1-v0', 'gvgai-testgame2-lvl0-v0', 'gvgai-testgame2-lvl0-v0', 'gvgai-testgame3-lvl0-v0', 'gvgai-testgame3-lvl1-v0' ] allGames = ['Assault-v0'] envs = {} for game in allGames: envs[game] = [] for w in range(workers): # each process needs its own environment envs[game].append(gym.make(game)) gameQueue = list(allGames) random.shuffle(gameQueue) trainer = TpgTrainer(actions=range(6), teamPopSizeInit=50) summaryScores = [] # record score summaries for each gen (min, max, avg) for gen in range(100): # generation loop envQueue = [] # hold envs for current gen # get right env in envQueue game = gameQueue.pop() # take out last game print('playing on', game) for w in range(workers): envQueue.append(envs[game][w]) # re-get games list if len(gameQueue) == 0: gameQueue = list(allGames) random.shuffle(gameQueue)
allGames = [ 'gvgai-testgame1-lvl0-v0', 'gvgai-testgame1-lvl1-v0', 'gvgai-testgame2-lvl0-v0', 'gvgai-testgame2-lvl0-v0', 'gvgai-testgame3-lvl0-v0', 'gvgai-testgame3-lvl1-v0' ] allGames = ['Assault-v0'] envs = {} for game in allGames: envs[game] = gym.make(game) gameQueue = list(allGames) random.shuffle(gameQueue) trainer = TpgTrainer(actions=range(6), teamPopSizeInit=360) curScores = [] # hold scores in a generation summaryScores = [] # record score summaries for each gen (min, max, avg) for gen in range(100): # generation loop curScores = [] # new list per gen # get right env in envQueue game = gameQueue.pop() # take out last game print('playing on', game) env = envs[game] # re-get games list if len(gameQueue) == 0: gameQueue = list(allGames) random.shuffle(gameQueue)
import multiprocessing as mp import pickle import time pool = mp.Pool(processes=15, initializer=limit_cpu, maxtasksperchild=1) man = mp.Manager() # manager for shared memory lists allEnvNames = [ 'Alien-v0', 'Asteroids-v0', 'Atlantis-v0', 'BankHeist-v0', 'BattleZone-v0', 'Bowling-v0', 'Boxing-v0', 'Centipede-v0' ] # create TPG trainer = TpgTrainer(actions=range(18), teamPopSize=200, rTeamPopSize=200, maxProgramSize=128, singlePop=False) tstart = time.time() # create populations for envName in allEnvNames: trainer.createNewPopulation(popName=envName) logName = 'sgp-log-8-pops.txt' with open(logName, 'a') as f: f.write( 'tpgGen,hoursElapsed,envName,tpgMin,tpgMax,tpgAvg,eliteSize,eliteUid\n' )
def ggpTrainAllAtOnce(): # create TPG trainer = TpgTrainer(actions=range(18), teamPopSize=options.popSize, rTeamPopSize=options.popSize, maxProgramSize=128) envNamesSrt = sorted(list(allEnvNames)) # for reporting envs played fitnessEnvs = [] while True: # train indefinately print('TPG Gen: ' + str(trainer.populations[None].curGen)) if options.fitGamesNum < options.numGames and options.fitGamesNum > 0: fitnessEnvs.append( random.choice(list(set(allEnvNames) - set(fitnessEnvs)))) if len(fitnessEnvs) > options.fitGamesNum: fitnessEnvs.pop(0) else: fitnessEnvs = list(allEnvNames) for envName in fitnessEnvs: print('Playing Game: ' + envName) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, envName, scoreList, options.trainEps, options.trainFrames, None) for agent in trainer.getAllAgents(skipTasks=[envName], noRef=True)]) trainer.applyScores(scoreList) # report curEnv results to log scoreStats = trainer.getTaskScores(envName) bestTeam = trainer.getBestTeams(tasks=[envName])[0] with open(logFileGameScoresName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + envName + ',' + str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' + str(scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + '\n') # do evolution after all envs played trainer.multiEvolve(tasks=[fitnessEnvs] + [[en] for en in fitnessEnvs], weights=[0.5] + [0.5 / len(fitnessEnvs) for _ in fitnessEnvs], fitMethod='min', elitistTasks=allEnvNames) # report generational fitness results bestTeam = trainer.getBestTeams(tasks=fitnessEnvs)[0] with open(logFileFitnessName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + '/'.join(sorted(list(fitnessEnvs))) + ',' + str(trainer.populations[None].scoreStats['min']) + ',' + str(trainer.populations[None].scoreStats['max']) + ',' + str(trainer.populations[None].scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + ',' + str(len(trainer.populations[None].teams)) + ',' + str(len(trainer.populations[None].rootTeams)) + '\n') # save model after every gen with open(trainerFileName, 'wb') as f: pickle.dump(trainer, f) # every 10 generations save all agent outcomes if trainer.populations[None].curGen % 10 == 0: for rt in trainer.populations[None].rootTeams: with open(logFileOutcomesName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + str(rt.uid)) for envName in allEnvNames: f.write(',' + str(rt.outcomes.get(envName, '-999999'))) f.write('\n') # every 50 generations evaluate top agents on all games if trainer.populations[None].curGen % options.champEvalGen == 0: champEval(envNamesSrt, trainer, logFileChampionsName, pool, man, tstart, frames=options.testFrames, eps=options.testEps)
env.close() agent.reward(scoreTotal, envName + '-' + str(numFrames)) scoreList.append((agent.getUid(), agent.getOutcomes())) # https://stackoverflow.com/questions/42103367/limit-total-cpu-usage-in-python-multiprocessing/42130713 def limit_cpu(): p = psutil.Process(os.getpid()) p.nice(10) envName = 'Boxing-v0' if options.curGen == 0: tmpEnv = gym.make(envName) trainer = TpgTrainer(actions=range(tmpEnv.action_space.n), teamPopSize=360) tmpEnv.close() else: with open('saved-model-sgp.pkl', 'rb') as f: trainer = pickle.load(f) processes = 2 pool = mp.Pool(processes=processes, initializer=limit_cpu, maxtasksperchild=5) man = mp.Manager() allScores = [] # track all scores each generation tStart = time.time() curGen = options.curGen # generation of tpg
agent.reward(score) scoreList.append((agent.getUid(), agent.getOutcomes())) # https://stackoverflow.com/questions/42103367/limit-total-cpu-usage-in-python-multiprocessing/42130713 def limit_cpu(): p = psutil.Process(os.getpid()) p.nice(10) if options.cont: with open('saved-model-1.pkl', 'rb') as f: trainer = pickle.load(f) else: trainer = TpgTrainer(actions=19, actionRange=(-1.0, 1.0, 0.35), teamPopSize=360, maxProgramSize=256) processes = 7 pool = mp.Pool(processes=processes, maxtasksperchild=2) man = mp.Manager() allScores = [] # track all scores each generation tStart = time.time() logFileName = 'train-log-' + datetime.datetime.now().strftime( "%Y-%m-%d-%H-%M") + '.txt' while True: # do generations with no end scoreList = man.list()