Ejemplo n.º 1
0
def runPopulation(envName="Boxing-v0", gens=1000, popSize=360, reps=3,
        frames=18000, nRandFrames=30):
    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n

    trainer = Trainer(actions=range(acts), teamPopSize=popSize)

    tStart = time.time()

    allScores = [] # track scores per gen
    for gen in range(gens): # do generations of training
        agents = trainer.getAgents()

        while True: # loop through agents of current generation
            if len(agents) == 0:
                break

            agent = agents.pop()
            if agent.taskDone(envName):
                continue

            score = 0
            for i in range(reps): # repetitions of game
                state = env.reset()
                for j in range(frames): # frames of game
                    # start random for stochasticity
                    if j < nRandFrames:
                        state, reward, isDone, debug = env.step(env.action_space.sample())
                        continue

                    act = agent.act(getStateALE(np.array(state, dtype=np.int32)))
                    state, reward, isDone, debug = env.step(act)

                    score += reward # accumulate reward in score
                    if isDone:
                        break # end early if losing state

            agent.reward(score/reps, envName)

            print('Agent #' + str(agent.agentNum) +
                ' | Score: ' + str(score/reps))

        # current generation done
        trainer.evolve(tasks=[envName])

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average']))

        print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        print('Gen: ' + str(gen))
        print('Results so far: ' + str(allScores))

    print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0],score[1],score[2])

    return trainer, allScores[-1]
Ejemplo n.º 2
0
def train():    
    
    tStart = time.time()
    # stack_size=4
    envName = 'deadly_corridor.cfg'
    game = DoomGame()
    game.load_config(envName)
    game.set_sound_enabled(False)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()
    # acts = game.get_available_buttons_size()
    del game

    trainer = Trainer(actions=range(4), teamPopSize=30, rTeamPopSize=30)
    # trainer = loadTrainer('trainer.tn')
    # trainer = loadTrainer('trainer.tn')
    processes = 7
    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = [] # track all scores each generation

    for gen in range(100): # do 100 generations of training
        scoreList = man.list()

        # get agents, noRef to not hold reference to trainer in each one
        # don't need reference to trainer in multiprocessing
        agents = trainer.getAgents()# swap out agents only at start of generation

        # run the agents
        pool.map(runAgent, 
            [(agent, envName, scoreList, 1, 2000)
            for agent in agents])

        # apply scores, must do this when multiprocessing
        # because agents can't refer to trainer
        teams = trainer.applyScores(scoreList)
        # important to remember to set tasks right, unless not using task names
        # task name set in runAgent()
        trainer.evolve(tasks=[envName]) # go into next gen

        # an easier way to track stats than the above example
        scoreStats = trainer.fitnessStats
        allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average']))
        print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        print('Gen: ' + str(gen))
        print('Results so far: ' + str(allScores))

    # clear_output()
    print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0],score[1],score[2])

    trainer.saveToFile('trainer.tn')
Ejemplo n.º 3
0
def runPopulationParallel(envName="Boxing-v0",
                          gens=1000,
                          popSize=360,
                          reps=3,
                          frames=18000,
                          processes=4,
                          nRandFrames=30):
    tStart = time.time()

    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n
    del env

    trainer = Trainer(actions=range(acts), teamPopSize=popSize)

    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = []  # track all scores each generation

    for gen in range(gens):  # do generations of training
        scoreList = man.list()

        agents = trainer.getAgents(
        )  # swap out agents only at start of generation

        # run the agents
        pool.map(runAgentParallel,
                 [(agent, envName, scoreList, reps, frames, nRandFrames)
                  for agent in agents])

        # prepare population for next gen
        teams = trainer.applyScores(scoreList)
        trainer.evolve(tasks=[envName])  # go into next gen

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append(
            (scoreStats['min'], scoreStats['max'], scoreStats['average']))

        #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        #print('Gen: ' + str(gen))
        #print('Results so far: ' + str(allScores))
        print(
            f"Gen: {gen}, Best Score: {scoreStats['max']}, Time: {str((time.time() - tStart)/3600)}"
        )

    print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0], score[1], score[2])

    return trainer, allScores[-1]
Ejemplo n.º 4
0
        for i in range(500):  # run episodes that last 500 frames
            show_state(env, i, 'Assault', 'Gen #' + str(gen) + ', Team #' +
                       str(teamNum) + ', Score: ' + str(score))  # render env

            # get action from agent
            # must transform to at-least int-32 (for my getState to bitshift correctly)
            act = agent.act(getState(np.array(state, dtype=np.int32)))

            # feedback from env
            state, reward, isDone, debug = env.step(act)
            score += reward  # accumulate reward in score
            if isDone:
                break  # end early if losing state

        agent.reward(score)  # must reward agent (if didn't already score)

        curScores.append(score)  # store score

        if len(agents) == 0:
            break

    # at end of generation, make summary of scores
    summaryScores.append((min(curScores), max(curScores),
                          sum(curScores) / len(curScores)))  # min, max, avg
    trainer.evolve()

#clear_output(wait=True)
print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600))
print('Results:\nMin, Max, Avg')
for result in summaryScores:
    print(result[0], result[1], result[2])
Ejemplo n.º 5
0
def runPopulationParallel(envName="Boxing-v0",
                          gens=1000,
                          popSize=360,
                          reps=3,
                          frames=18000,
                          processes=4,
                          nRandFrames=30,
                          rootBasedPop=True,
                          memType=None,
                          operationSet="full",
                          rampancy=(5, 5, 5),
                          traversal="team",
                          do_real=False):
    tStart = time.time()
    '''
    Python really is something special... sometimes it just deadlocks...¯\_(ツ)_/¯
    https://pythonspeed.com/articles/python-multiprocessing/
    '''
    set_start_method("spawn")

    print("creating atari environment")
    # get num actions
    env = gym.make(envName)
    acts = env.action_space.n
    del env

    print("creating trainer")
    if do_real:
        trainer = Trainer(actions=[1, 1],
                          teamPopSize=popSize,
                          rootBasedPop=rootBasedPop,
                          memType=memType,
                          operationSet=operationSet,
                          rampancy=rampancy,
                          traversal=traversal)
    else:
        trainer = Trainer(actions=acts,
                          teamPopSize=popSize,
                          rootBasedPop=rootBasedPop,
                          memType=memType,
                          operationSet=operationSet,
                          rampancy=rampancy,
                          traversal=traversal)

    trainer.configFunctions()
    #print(1/0)

    man = mp.Manager()
    pool = mp.Pool(processes=processes, maxtasksperchild=1)

    allScores = []  # track all scores each generation

    print("running generations")
    for gen in range(gens):  # do generations of training
        print("doing generation {}".format(gen))
        scoreList = man.list()

        agents = trainer.getAgents(
        )  # swap out agents only at start of generation
        agent = agents[0]

        try:

            # run the agents
            pool.map(runAgentParallel, [
                (agent, envName, scoreList, reps, frames, nRandFrames, do_real)
                for agent in agents
            ])

        except Exception as mpException:
            print(
                "Exception occured while running multiprocessing via pool.map!"
            )
            print(mpException)
            raise mpException

        # prepare population for next gen
        print("Applying gen {} scores to agents".format(gen))
        teams = trainer.applyScores(scoreList)
        print("Getting champion")
        champ = trainer.getAgents(sortTasks=[envName])[0].team
        print("Evolving population")
        trainer.evolve(tasks=[envName])  # go into next gen

        # track stats
        scoreStats = trainer.fitnessStats
        allScores.append(
            (scoreStats['min'], scoreStats['max'], scoreStats['average']))

        #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600))
        #print('Gen: ' + str(gen))
        #print('Results so far: ' + str(allScores))

        print(
            "teams: {}, rTeams: {}, learners: {}, Champ Teams: {}, Champ Learners: {}, Champ Instructions: {}."
            .format(
                len(trainer.teams), len(trainer.rootTeams),
                len(trainer.learners), len(getTeams(champ)),
                len(getLearners(champ)),
                learnerInstructionStats(getLearners(champ),
                                        trainer.operations)))
        #print(actionInstructionStats(getLearners(champ), trainer.operations))
        #print(1/0)

        print(
            f"Gen: {gen}, Best Score: {scoreStats['max']}, Avg Score: {scoreStats['average']}, Time: {str((time.time() - tStart)/3600)}"
        )

    print(pathDepths(champ))

    print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600))
    print('Results:\nMin, Max, Avg')
    for score in allScores:
        print(score[0], score[1], score[2])

    return trainer, allScores[-1]