예제 #1
0
def train():    
    
    tStart = time.time()
    # stack_size=4
    envName = 'deadly_corridor.cfg'
    game = DoomGame()
    game.load_config(envName)
    game.set_sound_enabled(False)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()
    # acts = game.get_available_buttons_size()
    del game

    trainer = Trainer(actions=range(4), teamPopSize=30, rTeamPopSize=30)
    # trainer = loadTrainer('trainer.tn')
    # trainer = loadTrainer('trainer.tn')
    processes = 7
    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = [] # track all scores each generation

    for gen in range(100): # do 100 generations of training
        scoreList = man.list()

        # get agents, noRef to not hold reference to trainer in each one
        # don't need reference to trainer in multiprocessing
        agents = trainer.getAgents()# swap out agents only at start of generation

        # run the agents
        pool.map(runAgent, 
            [(agent, envName, scoreList, 1, 2000)
            for agent in agents])

        # apply scores, must do this when multiprocessing
        # because agents can't refer to trainer
        teams = trainer.applyScores(scoreList)
        # important to remember to set tasks right, unless not using task names
        # task name set in runAgent()
        trainer.evolve(tasks=[envName]) # go into next gen

        # an easier way to track stats than the above example
        scoreStats = trainer.fitnessStats
        allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average']))
        print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        print('Gen: ' + str(gen))
        print('Results so far: ' + str(allScores))

    # clear_output()
    print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0],score[1],score[2])

    trainer.saveToFile('trainer.tn')
예제 #2
0
def runPopulationParallel(envName="Boxing-v0",
                          gens=1000,
                          popSize=360,
                          reps=3,
                          frames=18000,
                          processes=4,
                          nRandFrames=30):
    tStart = time.time()

    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n
    del env

    trainer = Trainer(actions=range(acts), teamPopSize=popSize)

    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = []  # track all scores each generation

    for gen in range(gens):  # do generations of training
        scoreList = man.list()

        agents = trainer.getAgents(
        )  # swap out agents only at start of generation

        # run the agents
        pool.map(runAgentParallel,
                 [(agent, envName, scoreList, reps, frames, nRandFrames)
                  for agent in agents])

        # prepare population for next gen
        teams = trainer.applyScores(scoreList)
        trainer.evolve(tasks=[envName])  # go into next gen

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append(
            (scoreStats['min'], scoreStats['max'], scoreStats['average']))

        #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        #print('Gen: ' + str(gen))
        #print('Results so far: ' + str(allScores))
        print(
            f"Gen: {gen}, Best Score: {scoreStats['max']}, Time: {str((time.time() - tStart)/3600)}"
        )

    print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0], score[1], score[2])

    return trainer, allScores[-1]
예제 #3
0
파일: extras.py 프로젝트: Ryan-Amaral/PyTPG
def runPopulationParallel(envName="Boxing-v0",
                          gens=1000,
                          popSize=360,
                          reps=3,
                          frames=18000,
                          processes=4,
                          nRandFrames=30,
                          rootBasedPop=True,
                          memType=None,
                          operationSet="full",
                          rampancy=(5, 5, 5),
                          traversal="team",
                          do_real=False):
    tStart = time.time()
    '''
    Python really is something special... sometimes it just deadlocks...¯\_(ツ)_/¯
    https://pythonspeed.com/articles/python-multiprocessing/
    '''
    set_start_method("spawn")

    print("creating atari environment")
    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n
    del env

    print("creating trainer")
    if do_real:
        trainer = Trainer(actions=[1, 1],
                          teamPopSize=popSize,
                          rootBasedPop=rootBasedPop,
                          memType=memType,
                          operationSet=operationSet,
                          rampancy=rampancy,
                          traversal=traversal)
    else:
        trainer = Trainer(actions=acts,
                          teamPopSize=popSize,
                          rootBasedPop=rootBasedPop,
                          memType=memType,
                          operationSet=operationSet,
                          rampancy=rampancy,
                          traversal=traversal)

    trainer.configFunctions()
    #print(1/0)

    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = []  # track all scores each generation

    print("running generations")
    for gen in range(gens):  # do generations of training
        print("doing generation {}".format(gen))
        scoreList = man.list()

        agents = trainer.getAgents(
        )  # swap out agents only at start of generation
        agent = agents[0]

        try:

            # run the agents
            pool.map(runAgentParallel, [
                (agent, envName, scoreList, reps, frames, nRandFrames, do_real)
                for agent in agents
            ])

        except Exception as mpException:
            print(
                "Exception occured while running multiprocessing via pool.map!"
            )
            print(mpException)
            raise mpException

        # prepare population for next gen
        print("Applying gen {} scores to agents".format(gen))
        teams = trainer.applyScores(scoreList)
        print("Getting champion")
        champ = trainer.getAgents(sortTasks=[envName])[0].team
        print("Evolving population")
        trainer.evolve(tasks=[envName])  # go into next gen

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append(
            (scoreStats['min'], scoreStats['max'], scoreStats['average']))

        #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        #print('Gen: ' + str(gen))
        #print('Results so far: ' + str(allScores))

        print(
            "teams: {}, rTeams: {}, learners: {}, Champ Teams: {}, Champ Learners: {}, Champ Instructions: {}."
            .format(
                len(trainer.teams), len(trainer.rootTeams),
                len(trainer.learners), len(getTeams(champ)),
                len(getLearners(champ)),
                learnerInstructionStats(getLearners(champ),
                                        trainer.operations)))
        #print(actionInstructionStats(getLearners(champ), trainer.operations))
        #print(1/0)

        print(
            f"Gen: {gen}, Best Score: {scoreStats['max']}, Avg Score: {scoreStats['average']}, Time: {str((time.time() - tStart)/3600)}"
        )

    print(pathDepths(champ))

    print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0], score[1], score[2])

    return trainer, allScores[-1]