예제 #1
0
def runPopulation(envName="Boxing-v0", gens=1000, popSize=360, reps=3,
        frames=18000, nRandFrames=30):
    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n

    trainer = Trainer(actions=range(acts), teamPopSize=popSize)

    tStart = time.time()

    allScores = [] # track scores per gen
    for gen in range(gens): # do generations of training
        agents = trainer.getAgents()

        while True: # loop through agents of current generation
            if len(agents) == 0:
                break

            agent = agents.pop()
            if agent.taskDone(envName):
                continue

            score = 0
            for i in range(reps): # repetitions of game
                state = env.reset()
                for j in range(frames): # frames of game
                    # start random for stochasticity
                    if j < nRandFrames:
                        state, reward, isDone, debug = env.step(env.action_space.sample())
                        continue

                    act = agent.act(getStateALE(np.array(state, dtype=np.int32)))
                    state, reward, isDone, debug = env.step(act)

                    score += reward # accumulate reward in score
                    if isDone:
                        break # end early if losing state

            agent.reward(score/reps, envName)

            print('Agent #' + str(agent.agentNum) +
                ' | Score: ' + str(score/reps))

        # current generation done
        trainer.evolve(tasks=[envName])

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average']))

        print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        print('Gen: ' + str(gen))
        print('Results so far: ' + str(allScores))

    print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0],score[1],score[2])

    return trainer, allScores[-1]
예제 #2
0
def train():    
    
    tStart = time.time()
    # stack_size=4
    envName = 'deadly_corridor.cfg'
    game = DoomGame()
    game.load_config(envName)
    game.set_sound_enabled(False)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()
    # acts = game.get_available_buttons_size()
    del game

    trainer = Trainer(actions=range(4), teamPopSize=30, rTeamPopSize=30)
    # trainer = loadTrainer('trainer.tn')
    # trainer = loadTrainer('trainer.tn')
    processes = 7
    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = [] # track all scores each generation

    for gen in range(100): # do 100 generations of training
        scoreList = man.list()

        # get agents, noRef to not hold reference to trainer in each one
        # don't need reference to trainer in multiprocessing
        agents = trainer.getAgents()# swap out agents only at start of generation

        # run the agents
        pool.map(runAgent, 
            [(agent, envName, scoreList, 1, 2000)
            for agent in agents])

        # apply scores, must do this when multiprocessing
        # because agents can't refer to trainer
        teams = trainer.applyScores(scoreList)
        # important to remember to set tasks right, unless not using task names
        # task name set in runAgent()
        trainer.evolve(tasks=[envName]) # go into next gen

        # an easier way to track stats than the above example
        scoreStats = trainer.fitnessStats
        allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average']))
        print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        print('Gen: ' + str(gen))
        print('Results so far: ' + str(allScores))

    # clear_output()
    print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0],score[1],score[2])

    trainer.saveToFile('trainer.tn')
예제 #3
0
def runPopulationParallel(envName="Boxing-v0",
                          gens=1000,
                          popSize=360,
                          reps=3,
                          frames=18000,
                          processes=4,
                          nRandFrames=30):
    tStart = time.time()

    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n
    del env

    trainer = Trainer(actions=range(acts), teamPopSize=popSize)

    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = []  # track all scores each generation

    for gen in range(gens):  # do generations of training
        scoreList = man.list()

        agents = trainer.getAgents(
        )  # swap out agents only at start of generation

        # run the agents
        pool.map(runAgentParallel,
                 [(agent, envName, scoreList, reps, frames, nRandFrames)
                  for agent in agents])

        # prepare population for next gen
        teams = trainer.applyScores(scoreList)
        trainer.evolve(tasks=[envName])  # go into next gen

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append(
            (scoreStats['min'], scoreStats['max'], scoreStats['average']))

        #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        #print('Gen: ' + str(gen))
        #print('Results so far: ' + str(allScores))
        print(
            f"Gen: {gen}, Best Score: {scoreStats['max']}, Time: {str((time.time() - tStart)/3600)}"
        )

    print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0], score[1], score[2])

    return trainer, allScores[-1]
예제 #4
0
    def test_pickle(self):

        trainer = Trainer(actions=self.dummy_actions)

        trainer.saveToFile("test_trainer_save")

        loaded_trainer = loadTrainer("test_trainer_save")
        '''
        Ensure loaded trainer has all the same teams and learners
        '''
        self.assertEqual(len(trainer.teams), len(loaded_trainer.teams))
        for cursor in trainer.teams:
            self.assertIn(cursor, loaded_trainer.teams)

        self.assertEqual(len(trainer.learners), len(loaded_trainer.learners))
        for cursor in trainer.learners:
            self.assertIn(cursor, loaded_trainer.learners)
    gameReps = opts.gameReps

    #Set up Magnus to destroy the webway
    print("Comment 1")
    if path.exists("Tzeentch") and path.exists("Magnus"):
        trainer = Trainer
        trainer = trainer.loadTrainer("Tzeentch")
        agents = trainer.getAgents()
        agent = Agent
        agent = agent.loadAgent("Magnus")
        print("yo as far as I understand the load was successful?")

    else:
        IQ = 0
        trainer = Trainer(actions=range(30),
                          teamPopSize=opts.popSize,
                          rTeamPopSize=opts.popSize,
                          sourceRange=310)
        agents = trainer.getAgents()
        agent = agents.pop()
        agent.saveToFile("Magnus")
        #psykerLevel = 0
        trainer.saveToFile("Tzeentch")

    agentScores = []
    curGen = 0
    #psykerLevel += agent.psykerLevel

    lastState = None

    # create a log file
    global logName
예제 #6
0
# import to do training
from tpg.trainer import Trainer
# import to run an agent (always needed)
from tpg.agent import Agent

# The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: GoNoGo()])

import time  # for tracking time

tStart = time.time()

# first create an instance of the TpgTrainer
# this creates the whole population and everything
# teamPopSize should realistically be at-least 100
trainer = Trainer(actions=range(2), teamPopSize=20, rTeamPopSize=20)

curScores = []  # hold scores in a generation
summaryScores = []  # record score summaries for each gen (min, max, avg)

# 5 generations isn't much (not even close), but some improvements
# should be seen.
for gen in range(5):  # generation loop
    curScores = []  # new list per gen

    agents = trainer.getAgents()

    while True:  # loop to go through agents
        teamNum = len(agents)
        agent = agents.pop()
        if agent is None:
예제 #7
0
    return np.ndarray.flatten(state)


# import to do training
from tpg.trainer import Trainer
# import to run an agent (always needed)
from tpg.agent import Agent

# Source: as adapted from https://github.com/Ryan-Amaral/PyTPG/blob/master/tpg_examples.ipynb
import time  # for tracking time
tStart = time.time()

# first create an instance of the TpgTrainer
# this creates the whole population and everything
# teamPopSize should realistically be at-least 100
trainer = Trainer(actions=range(env.action_space.n), teamPopSize=100)

curScores = []  # hold scores in a generation
summaryScores = []  # record score summaries for each gen (min, max, avg)

# 5 generations isn't much (not even close), but some improvements
# should be seen.
for gen in range(20):  # generation loop
    curScores = []  # new list per gen

    agents = trainer.getAgents()

    while True:  # loop to go through agents
        teamNum = len(agents)
        agent = agents.pop()
        if agent is None:
예제 #8
0
    def test_init(self):

        # Test team pop sizes
        for cursor in self.teamPopSize:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  teamPopSize=cursor[0])
                self.assertEqual(cursor[0], trainer.teamPopSize)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      teamPopSize=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test Root Based Pop
        for cursor in self.rootBasedPop:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  rootBasedPop=cursor[0])
                self.assertEqual(cursor[0], trainer.rootBasedPop)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      rootBasedPop=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test Gap
        for cursor in self.gap:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions, gap=cursor[0])
                self.assertEqual(cursor[0], trainer.gap)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      gap=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test Input Size
        for cursor in self.inputSize:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  inputSize=cursor[0])
                self.assertEqual(cursor[0], trainer.inputSize)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      inputSize=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test nRegisters
        for cursor in self.nRegisters:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  nRegisters=cursor[0])
                self.assertEqual(cursor[0], trainer.nRegisters)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      nRegisters=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test initMaxTeamSize
        for cursor in self.initMaxTeamSize:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  initMaxTeamSize=cursor[0])
                self.assertEqual(cursor[0], trainer.initMaxTeamSize)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      initMaxTeamSize=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test initMaxProgSize
        for cursor in self.initMaxProgSize:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  initMaxProgSize=cursor[0])
                self.assertEqual(cursor[0], trainer.initMaxProgSize)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      initMaxProgSize=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test doElites
        for cursor in self.doElites:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  doElites=cursor[0])
                self.assertEqual(cursor[0], trainer.doElites)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      doElites=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test rampancy
        for cursor in self.rampancy:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  rampancy=cursor[0])
                self.assertEqual(cursor[0], trainer.rampancy)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      rampancy=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test operation set
        for cursor in self.operationSet:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  operationSet=cursor[0])
                self.assertEqual(cursor[0], trainer.operationSet)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      operationSet=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test traversal
        for cursor in self.traversal:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  traversal=cursor[0])
                self.assertEqual(cursor[0], trainer.traversal)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      traversal=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pLrnDel
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pLrnDel=cursor[0])
                self.assertEqual(cursor[0], trainer.pLrnDel)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pLrnDel=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pLrnAdd
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pLrnAdd=cursor[0])
                self.assertEqual(cursor[0], trainer.pLrnAdd)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pLrnAdd=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pLrnMut
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pLrnMut=cursor[0])
                self.assertEqual(cursor[0], trainer.pLrnMut)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pLrnMut=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pProgMut
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pProgMut=cursor[0])
                self.assertEqual(cursor[0], trainer.pProgMut)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pProgMut=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pActMut
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pActMut=cursor[0])
                self.assertEqual(cursor[0], trainer.pActMut)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pActMut=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pActAtom
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pActAtom=cursor[0])
                self.assertEqual(cursor[0], trainer.pActAtom)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pActAtom=cursor[0])
                    self.assertIsNotNone(expected.exception)
        # Test pInstDel
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pInstDel=cursor[0])
                self.assertEqual(cursor[0], trainer.pInstDel)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pInstDel=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pInstAdd
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pInstAdd=cursor[0])
                self.assertEqual(cursor[0], trainer.pInstAdd)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pInstAdd=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pInstSwp
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pInstSwp=cursor[0])
                self.assertEqual(cursor[0], trainer.pInstSwp)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pInstSwp=cursor[0])
                    self.assertIsNotNone(expected.exception)

        # Test pInstMut
        for cursor in self.probability_pool:
            if cursor[1]:  # If this input is valid
                trainer = Trainer(actions=self.dummy_actions,
                                  pInstMut=cursor[0])
                self.assertEqual(cursor[0], trainer.pInstMut)
            else:  # This input is invalid, ensure it throws an exception
                with self.assertRaises(Exception) as expected:
                    trainer = Trainer(actions=self.dummy_actions,
                                      pInstMut=cursor[0])
                    self.assertIsNotNone(expected.exception)

        trainer = Trainer(actions=self.dummy_actions)

        # Ensure the right inital team pop size was created
        self.assertEqual(trainer.teamPopSize, len(trainer.teams))
        # Ensure there are learners
        self.assertGreater(len(trainer.learners), 0)
예제 #9
0
    global trainer
    global agents
    global agent
    global agentScores
    global curGen
    global lastState
    
    
    breezyIp = opts.breezyIp
    breezyPort = opts.breezyPort
    totalGens = opts.gens
    gameReps = opts.gameReps
    
    # set up of the TPG agent
    trainer = Trainer(actions=range(30), 
                      teamPopSize=opts.popSize, 
                      rTeamPopSize=opts.popSize,
                      sourceRange=310)
    agents = trainer.getAgents()
    agent = agents.pop()
    agentScores = []
    curGen = 0
    lastState = None
    
    # create a log file 
    global logName
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
    logName = "log-{}.txt".format(timestamp)

    # serve until force stop
    while True:
        pass
예제 #10
0
파일: extras.py 프로젝트: Ryan-Amaral/PyTPG
def runPopulationParallel(envName="Boxing-v0",
                          gens=1000,
                          popSize=360,
                          reps=3,
                          frames=18000,
                          processes=4,
                          nRandFrames=30,
                          rootBasedPop=True,
                          memType=None,
                          operationSet="full",
                          rampancy=(5, 5, 5),
                          traversal="team",
                          do_real=False):
    tStart = time.time()
    '''
    Python really is something special... sometimes it just deadlocks...¯\_(ツ)_/¯
    https://pythonspeed.com/articles/python-multiprocessing/
    '''
    set_start_method("spawn")

    print("creating atari environment")
    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n
    del env

    print("creating trainer")
    if do_real:
        trainer = Trainer(actions=[1, 1],
                          teamPopSize=popSize,
                          rootBasedPop=rootBasedPop,
                          memType=memType,
                          operationSet=operationSet,
                          rampancy=rampancy,
                          traversal=traversal)
    else:
        trainer = Trainer(actions=acts,
                          teamPopSize=popSize,
                          rootBasedPop=rootBasedPop,
                          memType=memType,
                          operationSet=operationSet,
                          rampancy=rampancy,
                          traversal=traversal)

    trainer.configFunctions()
    #print(1/0)

    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = []  # track all scores each generation

    print("running generations")
    for gen in range(gens):  # do generations of training
        print("doing generation {}".format(gen))
        scoreList = man.list()

        agents = trainer.getAgents(
        )  # swap out agents only at start of generation
        agent = agents[0]

        try:

            # run the agents
            pool.map(runAgentParallel, [
                (agent, envName, scoreList, reps, frames, nRandFrames, do_real)
                for agent in agents
            ])

        except Exception as mpException:
            print(
                "Exception occured while running multiprocessing via pool.map!"
            )
            print(mpException)
            raise mpException

        # prepare population for next gen
        print("Applying gen {} scores to agents".format(gen))
        teams = trainer.applyScores(scoreList)
        print("Getting champion")
        champ = trainer.getAgents(sortTasks=[envName])[0].team
        print("Evolving population")
        trainer.evolve(tasks=[envName])  # go into next gen

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append(
            (scoreStats['min'], scoreStats['max'], scoreStats['average']))

        #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        #print('Gen: ' + str(gen))
        #print('Results so far: ' + str(allScores))

        print(
            "teams: {}, rTeams: {}, learners: {}, Champ Teams: {}, Champ Learners: {}, Champ Instructions: {}."
            .format(
                len(trainer.teams), len(trainer.rootTeams),
                len(trainer.learners), len(getTeams(champ)),
                len(getLearners(champ)),
                learnerInstructionStats(getLearners(champ),
                                        trainer.operations)))
        #print(actionInstructionStats(getLearners(champ), trainer.operations))
        #print(1/0)

        print(
            f"Gen: {gen}, Best Score: {scoreStats['max']}, Avg Score: {scoreStats['average']}, Time: {str((time.time() - tStart)/3600)}"
        )

    print(pathDepths(champ))

    print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0], score[1], score[2])

    return trainer, allScores[-1]