def runPopulation(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, nRandFrames=30): # get num actions env = gym.make(envName) acts = env.action_space.n trainer = Trainer(actions=range(acts), teamPopSize=popSize) tStart = time.time() allScores = [] # track scores per gen for gen in range(gens): # do generations of training agents = trainer.getAgents() while True: # loop through agents of current generation if len(agents) == 0: break agent = agents.pop() if agent.taskDone(envName): continue score = 0 for i in range(reps): # repetitions of game state = env.reset() for j in range(frames): # frames of game # start random for stochasticity if j < nRandFrames: state, reward, isDone, debug = env.step(env.action_space.sample()) continue act = agent.act(getStateALE(np.array(state, dtype=np.int32))) state, reward, isDone, debug = env.step(act) score += reward # accumulate reward in score if isDone: break # end early if losing state agent.reward(score/reps, envName) print('Agent #' + str(agent.agentNum) + ' | Score: ' + str(score/reps)) # current generation done trainer.evolve(tasks=[envName]) # track stats scoreStats = trainer.fitnessStats allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average'])) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Gen: ' + str(gen)) print('Results so far: ' + str(allScores)) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0],score[1],score[2]) return trainer, allScores[-1]
def train(): tStart = time.time() # stack_size=4 envName = 'deadly_corridor.cfg' game = DoomGame() game.load_config(envName) game.set_sound_enabled(False) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.init() # acts = game.get_available_buttons_size() del game trainer = Trainer(actions=range(4), teamPopSize=30, rTeamPopSize=30) # trainer = loadTrainer('trainer.tn') # trainer = loadTrainer('trainer.tn') processes = 7 man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation for gen in range(100): # do 100 generations of training scoreList = man.list() # get agents, noRef to not hold reference to trainer in each one # don't need reference to trainer in multiprocessing agents = trainer.getAgents()# swap out agents only at start of generation # run the agents pool.map(runAgent, [(agent, envName, scoreList, 1, 2000) for agent in agents]) # apply scores, must do this when multiprocessing # because agents can't refer to trainer teams = trainer.applyScores(scoreList) # important to remember to set tasks right, unless not using task names # task name set in runAgent() trainer.evolve(tasks=[envName]) # go into next gen # an easier way to track stats than the above example scoreStats = trainer.fitnessStats allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average'])) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Gen: ' + str(gen)) print('Results so far: ' + str(allScores)) # clear_output() print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0],score[1],score[2]) trainer.saveToFile('trainer.tn')
def runPopulationParallel(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, processes=4, nRandFrames=30): tStart = time.time() # get num actions env = gym.make(envName) acts = env.action_space.n del env trainer = Trainer(actions=range(acts), teamPopSize=popSize) man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation for gen in range(gens): # do generations of training scoreList = man.list() agents = trainer.getAgents( ) # swap out agents only at start of generation # run the agents pool.map(runAgentParallel, [(agent, envName, scoreList, reps, frames, nRandFrames) for agent in agents]) # prepare population for next gen teams = trainer.applyScores(scoreList) trainer.evolve(tasks=[envName]) # go into next gen # track stats scoreStats = trainer.fitnessStats allScores.append( (scoreStats['min'], scoreStats['max'], scoreStats['average'])) #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) #print('Gen: ' + str(gen)) #print('Results so far: ' + str(allScores)) print( f"Gen: {gen}, Best Score: {scoreStats['max']}, Time: {str((time.time() - tStart)/3600)}" ) print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0], score[1], score[2]) return trainer, allScores[-1]
def test_pickle(self): trainer = Trainer(actions=self.dummy_actions) trainer.saveToFile("test_trainer_save") loaded_trainer = loadTrainer("test_trainer_save") ''' Ensure loaded trainer has all the same teams and learners ''' self.assertEqual(len(trainer.teams), len(loaded_trainer.teams)) for cursor in trainer.teams: self.assertIn(cursor, loaded_trainer.teams) self.assertEqual(len(trainer.learners), len(loaded_trainer.learners)) for cursor in trainer.learners: self.assertIn(cursor, loaded_trainer.learners)
gameReps = opts.gameReps #Set up Magnus to destroy the webway print("Comment 1") if path.exists("Tzeentch") and path.exists("Magnus"): trainer = Trainer trainer = trainer.loadTrainer("Tzeentch") agents = trainer.getAgents() agent = Agent agent = agent.loadAgent("Magnus") print("yo as far as I understand the load was successful?") else: IQ = 0 trainer = Trainer(actions=range(30), teamPopSize=opts.popSize, rTeamPopSize=opts.popSize, sourceRange=310) agents = trainer.getAgents() agent = agents.pop() agent.saveToFile("Magnus") #psykerLevel = 0 trainer.saveToFile("Tzeentch") agentScores = [] curGen = 0 #psykerLevel += agent.psykerLevel lastState = None # create a log file global logName
# import to do training from tpg.trainer import Trainer # import to run an agent (always needed) from tpg.agent import Agent # The algorithms require a vectorized environment to run env = DummyVecEnv([lambda: GoNoGo()]) import time # for tracking time tStart = time.time() # first create an instance of the TpgTrainer # this creates the whole population and everything # teamPopSize should realistically be at-least 100 trainer = Trainer(actions=range(2), teamPopSize=20, rTeamPopSize=20) curScores = [] # hold scores in a generation summaryScores = [] # record score summaries for each gen (min, max, avg) # 5 generations isn't much (not even close), but some improvements # should be seen. for gen in range(5): # generation loop curScores = [] # new list per gen agents = trainer.getAgents() while True: # loop to go through agents teamNum = len(agents) agent = agents.pop() if agent is None:
return np.ndarray.flatten(state) # import to do training from tpg.trainer import Trainer # import to run an agent (always needed) from tpg.agent import Agent # Source: as adapted from https://github.com/Ryan-Amaral/PyTPG/blob/master/tpg_examples.ipynb import time # for tracking time tStart = time.time() # first create an instance of the TpgTrainer # this creates the whole population and everything # teamPopSize should realistically be at-least 100 trainer = Trainer(actions=range(env.action_space.n), teamPopSize=100) curScores = [] # hold scores in a generation summaryScores = [] # record score summaries for each gen (min, max, avg) # 5 generations isn't much (not even close), but some improvements # should be seen. for gen in range(20): # generation loop curScores = [] # new list per gen agents = trainer.getAgents() while True: # loop to go through agents teamNum = len(agents) agent = agents.pop() if agent is None:
def test_init(self): # Test team pop sizes for cursor in self.teamPopSize: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, teamPopSize=cursor[0]) self.assertEqual(cursor[0], trainer.teamPopSize) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, teamPopSize=cursor[0]) self.assertIsNotNone(expected.exception) # Test Root Based Pop for cursor in self.rootBasedPop: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, rootBasedPop=cursor[0]) self.assertEqual(cursor[0], trainer.rootBasedPop) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, rootBasedPop=cursor[0]) self.assertIsNotNone(expected.exception) # Test Gap for cursor in self.gap: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, gap=cursor[0]) self.assertEqual(cursor[0], trainer.gap) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, gap=cursor[0]) self.assertIsNotNone(expected.exception) # Test Input Size for cursor in self.inputSize: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, inputSize=cursor[0]) self.assertEqual(cursor[0], trainer.inputSize) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, inputSize=cursor[0]) self.assertIsNotNone(expected.exception) # Test nRegisters for cursor in self.nRegisters: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, nRegisters=cursor[0]) self.assertEqual(cursor[0], trainer.nRegisters) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, nRegisters=cursor[0]) self.assertIsNotNone(expected.exception) # Test initMaxTeamSize for cursor in self.initMaxTeamSize: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, initMaxTeamSize=cursor[0]) self.assertEqual(cursor[0], trainer.initMaxTeamSize) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, initMaxTeamSize=cursor[0]) self.assertIsNotNone(expected.exception) # Test initMaxProgSize for cursor in self.initMaxProgSize: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, initMaxProgSize=cursor[0]) self.assertEqual(cursor[0], trainer.initMaxProgSize) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, initMaxProgSize=cursor[0]) self.assertIsNotNone(expected.exception) # Test doElites for cursor in self.doElites: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, doElites=cursor[0]) self.assertEqual(cursor[0], trainer.doElites) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, doElites=cursor[0]) self.assertIsNotNone(expected.exception) # Test rampancy for cursor in self.rampancy: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, rampancy=cursor[0]) self.assertEqual(cursor[0], trainer.rampancy) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, rampancy=cursor[0]) self.assertIsNotNone(expected.exception) # Test operation set for cursor in self.operationSet: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, operationSet=cursor[0]) self.assertEqual(cursor[0], trainer.operationSet) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, operationSet=cursor[0]) self.assertIsNotNone(expected.exception) # Test traversal for cursor in self.traversal: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, traversal=cursor[0]) self.assertEqual(cursor[0], trainer.traversal) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, traversal=cursor[0]) self.assertIsNotNone(expected.exception) # Test pLrnDel for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pLrnDel=cursor[0]) self.assertEqual(cursor[0], trainer.pLrnDel) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pLrnDel=cursor[0]) self.assertIsNotNone(expected.exception) # Test pLrnAdd for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pLrnAdd=cursor[0]) self.assertEqual(cursor[0], trainer.pLrnAdd) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pLrnAdd=cursor[0]) self.assertIsNotNone(expected.exception) # Test pLrnMut for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pLrnMut=cursor[0]) self.assertEqual(cursor[0], trainer.pLrnMut) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pLrnMut=cursor[0]) self.assertIsNotNone(expected.exception) # Test pProgMut for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pProgMut=cursor[0]) self.assertEqual(cursor[0], trainer.pProgMut) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pProgMut=cursor[0]) self.assertIsNotNone(expected.exception) # Test pActMut for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pActMut=cursor[0]) self.assertEqual(cursor[0], trainer.pActMut) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pActMut=cursor[0]) self.assertIsNotNone(expected.exception) # Test pActAtom for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pActAtom=cursor[0]) self.assertEqual(cursor[0], trainer.pActAtom) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pActAtom=cursor[0]) self.assertIsNotNone(expected.exception) # Test pInstDel for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pInstDel=cursor[0]) self.assertEqual(cursor[0], trainer.pInstDel) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pInstDel=cursor[0]) self.assertIsNotNone(expected.exception) # Test pInstAdd for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pInstAdd=cursor[0]) self.assertEqual(cursor[0], trainer.pInstAdd) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pInstAdd=cursor[0]) self.assertIsNotNone(expected.exception) # Test pInstSwp for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pInstSwp=cursor[0]) self.assertEqual(cursor[0], trainer.pInstSwp) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pInstSwp=cursor[0]) self.assertIsNotNone(expected.exception) # Test pInstMut for cursor in self.probability_pool: if cursor[1]: # If this input is valid trainer = Trainer(actions=self.dummy_actions, pInstMut=cursor[0]) self.assertEqual(cursor[0], trainer.pInstMut) else: # This input is invalid, ensure it throws an exception with self.assertRaises(Exception) as expected: trainer = Trainer(actions=self.dummy_actions, pInstMut=cursor[0]) self.assertIsNotNone(expected.exception) trainer = Trainer(actions=self.dummy_actions) # Ensure the right inital team pop size was created self.assertEqual(trainer.teamPopSize, len(trainer.teams)) # Ensure there are learners self.assertGreater(len(trainer.learners), 0)
global trainer global agents global agent global agentScores global curGen global lastState breezyIp = opts.breezyIp breezyPort = opts.breezyPort totalGens = opts.gens gameReps = opts.gameReps # set up of the TPG agent trainer = Trainer(actions=range(30), teamPopSize=opts.popSize, rTeamPopSize=opts.popSize, sourceRange=310) agents = trainer.getAgents() agent = agents.pop() agentScores = [] curGen = 0 lastState = None # create a log file global logName timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") logName = "log-{}.txt".format(timestamp) # serve until force stop while True: pass
def runPopulationParallel(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, processes=4, nRandFrames=30, rootBasedPop=True, memType=None, operationSet="full", rampancy=(5, 5, 5), traversal="team", do_real=False): tStart = time.time() ''' Python really is something special... sometimes it just deadlocks...¯\_(ツ)_/¯ https://pythonspeed.com/articles/python-multiprocessing/ ''' set_start_method("spawn") print("creating atari environment") # get num actions env = gym.make(envName) acts = env.action_space.n del env print("creating trainer") if do_real: trainer = Trainer(actions=[1, 1], teamPopSize=popSize, rootBasedPop=rootBasedPop, memType=memType, operationSet=operationSet, rampancy=rampancy, traversal=traversal) else: trainer = Trainer(actions=acts, teamPopSize=popSize, rootBasedPop=rootBasedPop, memType=memType, operationSet=operationSet, rampancy=rampancy, traversal=traversal) trainer.configFunctions() #print(1/0) man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation print("running generations") for gen in range(gens): # do generations of training print("doing generation {}".format(gen)) scoreList = man.list() agents = trainer.getAgents( ) # swap out agents only at start of generation agent = agents[0] try: # run the agents pool.map(runAgentParallel, [ (agent, envName, scoreList, reps, frames, nRandFrames, do_real) for agent in agents ]) except Exception as mpException: print( "Exception occured while running multiprocessing via pool.map!" ) print(mpException) raise mpException # prepare population for next gen print("Applying gen {} scores to agents".format(gen)) teams = trainer.applyScores(scoreList) print("Getting champion") champ = trainer.getAgents(sortTasks=[envName])[0].team print("Evolving population") trainer.evolve(tasks=[envName]) # go into next gen # track stats scoreStats = trainer.fitnessStats allScores.append( (scoreStats['min'], scoreStats['max'], scoreStats['average'])) #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) #print('Gen: ' + str(gen)) #print('Results so far: ' + str(allScores)) print( "teams: {}, rTeams: {}, learners: {}, Champ Teams: {}, Champ Learners: {}, Champ Instructions: {}." .format( len(trainer.teams), len(trainer.rootTeams), len(trainer.learners), len(getTeams(champ)), len(getLearners(champ)), learnerInstructionStats(getLearners(champ), trainer.operations))) #print(actionInstructionStats(getLearners(champ), trainer.operations)) #print(1/0) print( f"Gen: {gen}, Best Score: {scoreStats['max']}, Avg Score: {scoreStats['average']}, Time: {str((time.time() - tStart)/3600)}" ) print(pathDepths(champ)) print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0], score[1], score[2]) return trainer, allScores[-1]