def runPopulation(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, nRandFrames=30): # get num actions env = gym.make(envName) acts = env.action_space.n trainer = Trainer(actions=range(acts), teamPopSize=popSize) tStart = time.time() allScores = [] # track scores per gen for gen in range(gens): # do generations of training agents = trainer.getAgents() while True: # loop through agents of current generation if len(agents) == 0: break agent = agents.pop() if agent.taskDone(envName): continue score = 0 for i in range(reps): # repetitions of game state = env.reset() for j in range(frames): # frames of game # start random for stochasticity if j < nRandFrames: state, reward, isDone, debug = env.step(env.action_space.sample()) continue act = agent.act(getStateALE(np.array(state, dtype=np.int32))) state, reward, isDone, debug = env.step(act) score += reward # accumulate reward in score if isDone: break # end early if losing state agent.reward(score/reps, envName) print('Agent #' + str(agent.agentNum) + ' | Score: ' + str(score/reps)) # current generation done trainer.evolve(tasks=[envName]) # track stats scoreStats = trainer.fitnessStats allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average'])) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Gen: ' + str(gen)) print('Results so far: ' + str(allScores)) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0],score[1],score[2]) return trainer, allScores[-1]
def train(): tStart = time.time() # stack_size=4 envName = 'deadly_corridor.cfg' game = DoomGame() game.load_config(envName) game.set_sound_enabled(False) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.init() # acts = game.get_available_buttons_size() del game trainer = Trainer(actions=range(4), teamPopSize=30, rTeamPopSize=30) # trainer = loadTrainer('trainer.tn') # trainer = loadTrainer('trainer.tn') processes = 7 man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation for gen in range(100): # do 100 generations of training scoreList = man.list() # get agents, noRef to not hold reference to trainer in each one # don't need reference to trainer in multiprocessing agents = trainer.getAgents()# swap out agents only at start of generation # run the agents pool.map(runAgent, [(agent, envName, scoreList, 1, 2000) for agent in agents]) # apply scores, must do this when multiprocessing # because agents can't refer to trainer teams = trainer.applyScores(scoreList) # important to remember to set tasks right, unless not using task names # task name set in runAgent() trainer.evolve(tasks=[envName]) # go into next gen # an easier way to track stats than the above example scoreStats = trainer.fitnessStats allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average'])) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Gen: ' + str(gen)) print('Results so far: ' + str(allScores)) # clear_output() print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0],score[1],score[2]) trainer.saveToFile('trainer.tn')
def runPopulationParallel(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, processes=4, nRandFrames=30): tStart = time.time() # get num actions env = gym.make(envName) acts = env.action_space.n del env trainer = Trainer(actions=range(acts), teamPopSize=popSize) man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation for gen in range(gens): # do generations of training scoreList = man.list() agents = trainer.getAgents( ) # swap out agents only at start of generation # run the agents pool.map(runAgentParallel, [(agent, envName, scoreList, reps, frames, nRandFrames) for agent in agents]) # prepare population for next gen teams = trainer.applyScores(scoreList) trainer.evolve(tasks=[envName]) # go into next gen # track stats scoreStats = trainer.fitnessStats allScores.append( (scoreStats['min'], scoreStats['max'], scoreStats['average'])) #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) #print('Gen: ' + str(gen)) #print('Results so far: ' + str(allScores)) print( f"Gen: {gen}, Best Score: {scoreStats['max']}, Time: {str((time.time() - tStart)/3600)}" ) print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0], score[1], score[2]) return trainer, allScores[-1]
for i in range(500): # run episodes that last 500 frames show_state(env, i, 'Assault', 'Gen #' + str(gen) + ', Team #' + str(teamNum) + ', Score: ' + str(score)) # render env # get action from agent # must transform to at-least int-32 (for my getState to bitshift correctly) act = agent.act(getState(np.array(state, dtype=np.int32))) # feedback from env state, reward, isDone, debug = env.step(act) score += reward # accumulate reward in score if isDone: break # end early if losing state agent.reward(score) # must reward agent (if didn't already score) curScores.append(score) # store score if len(agents) == 0: break # at end of generation, make summary of scores summaryScores.append((min(curScores), max(curScores), sum(curScores) / len(curScores))) # min, max, avg trainer.evolve() #clear_output(wait=True) print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600)) print('Results:\nMin, Max, Avg') for result in summaryScores: print(result[0], result[1], result[2])
def runPopulationParallel(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, processes=4, nRandFrames=30, rootBasedPop=True, memType=None, operationSet="full", rampancy=(5, 5, 5), traversal="team", do_real=False): tStart = time.time() ''' Python really is something special... sometimes it just deadlocks...¯\_(ツ)_/¯ https://pythonspeed.com/articles/python-multiprocessing/ ''' set_start_method("spawn") print("creating atari environment") # get num actions env = gym.make(envName) acts = env.action_space.n del env print("creating trainer") if do_real: trainer = Trainer(actions=[1, 1], teamPopSize=popSize, rootBasedPop=rootBasedPop, memType=memType, operationSet=operationSet, rampancy=rampancy, traversal=traversal) else: trainer = Trainer(actions=acts, teamPopSize=popSize, rootBasedPop=rootBasedPop, memType=memType, operationSet=operationSet, rampancy=rampancy, traversal=traversal) trainer.configFunctions() #print(1/0) man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation print("running generations") for gen in range(gens): # do generations of training print("doing generation {}".format(gen)) scoreList = man.list() agents = trainer.getAgents( ) # swap out agents only at start of generation agent = agents[0] try: # run the agents pool.map(runAgentParallel, [ (agent, envName, scoreList, reps, frames, nRandFrames, do_real) for agent in agents ]) except Exception as mpException: print( "Exception occured while running multiprocessing via pool.map!" ) print(mpException) raise mpException # prepare population for next gen print("Applying gen {} scores to agents".format(gen)) teams = trainer.applyScores(scoreList) print("Getting champion") champ = trainer.getAgents(sortTasks=[envName])[0].team print("Evolving population") trainer.evolve(tasks=[envName]) # go into next gen # track stats scoreStats = trainer.fitnessStats allScores.append( (scoreStats['min'], scoreStats['max'], scoreStats['average'])) #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) #print('Gen: ' + str(gen)) #print('Results so far: ' + str(allScores)) print( "teams: {}, rTeams: {}, learners: {}, Champ Teams: {}, Champ Learners: {}, Champ Instructions: {}." .format( len(trainer.teams), len(trainer.rootTeams), len(trainer.learners), len(getTeams(champ)), len(getLearners(champ)), learnerInstructionStats(getLearners(champ), trainer.operations))) #print(actionInstructionStats(getLearners(champ), trainer.operations)) #print(1/0) print( f"Gen: {gen}, Best Score: {scoreStats['max']}, Avg Score: {scoreStats['average']}, Time: {str((time.time() - tStart)/3600)}" ) print(pathDepths(champ)) print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0], score[1], score[2]) return trainer, allScores[-1]