예제 #1
0
def main():

    # initiates trainer, actions are these predictions of the agent:
    # action 1 -> right box will fall off the edge
    # action 0 -> right box will not fall off
    trainer = TpgTrainer(actions=[0, 1], teamPopSize=50)

    _min, _max, _avg = [], [], []  # hold values for every generation

    for gen in range(GENERATIONS):  # generation loop
        print("Generation: ", gen + 1, "/", GENERATIONS)
        curScores = []  # new list per gen

        while True:  # loop to go through agents
            agent = trainer.getNextAgent()
            if agent is None:
                break  # no more agents, so proceed to next gen

            # evaluting the agent
            score = 0
            for i in range(EVALUATIONS):
                score += evaluateAgent(agent)
            agent.reward(score)

            curScores.append(score)

        print("Min:", min(curScores), "   Max:", max(curScores), "   Avg:",
              sum(curScores) / len(curScores),
              "(out of " + str(EVALUATIONS) + ")\n")

        _min.append(min(curScores))
        _max.append(max(curScores))
        _avg.append(sum(curScores) / len(curScores))

        trainer.evolve()

    # getting best agent after all the generations
    best_agent, best_score = getBestAgent(trainer)

    print("Best agent's score:", best_score, "/", EVALUATIONS)

    for run in range(FINAL_RUNS):
        print("Final run: ", run + 1, "/", FINAL_RUNS, end='\r')
        evaluateAgent(best_agent, graphics=True)

    # plotting progress over the generations
    generations = range(1, GENERATIONS + 1)

    axes = plt.gca()
    axes.set_ylim([0, EVALUATIONS])

    plt.plot(generations, _min, label="min")
    plt.plot(generations, _max, label="max")
    plt.plot(generations, _avg, label="avg")

    plt.xlabel("generation")
    plt.ylabel("score")
    plt.legend()
    plt.show()
def runTpg():

    tmpEnv = gym.make(options.envName)
    # create TPG
    trainer = TpgTrainer(actions=range(tmpEnv.action_space.n),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    tmpEnv.close()

    print('Playing Game: ' + options.envName)

    while trainer.populations[
            None].curGen < options.generations:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent, [(agent, options.envName, scoreList,
                             options.trainEps, options.trainFrames, None)
                            for agent in trainer.getAllAgents(
                                skipTasks=[options.envName], noRef=True)])

        trainer.applyScores(scoreList)

        # do evolution after all envs played
        trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName])

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=[options.envName])[0]
        with open(logFileTpg, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + options.envName +
                ',' + str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')
예제 #3
0
tStart = time.time()
processes = 2  # how many to run concurrently
m = mp.Manager()

allGames = [
    'gvgai-testgame1-lvl0-v0', 'gvgai-testgame1-lvl1-v0',
    'gvgai-testgame2-lvl0-v0', 'gvgai-testgame2-lvl0-v0',
    'gvgai-testgame3-lvl0-v0', 'gvgai-testgame3-lvl1-v0'
]
allGames = ['Assault-v0']

gameQueue = list(allGames)
random.shuffle(gameQueue)

trainer = TpgTrainer(actions=range(6), teamPopSizeInit=360)

pool = mp.Pool(processes=processes, initializer=limit_cpu)

summaryScores = []  # record score summaries for each gen (min, max, avg)

for gen in range(100):  # generation loop
    scoreQueue = m.Queue()  # hold agents when finish, to actually apply score

    # get right env in envQueue
    game = gameQueue.pop()  # take out last game
    print('playing on', game)
    # re-get games list
    if len(gameQueue) == 0:
        gameQueue = list(allGames)
        random.shuffle(gameQueue)
예제 #4
0
    'gvgai-testgame1-lvl0-v0', 'gvgai-testgame1-lvl1-v0',
    'gvgai-testgame2-lvl0-v0', 'gvgai-testgame2-lvl0-v0',
    'gvgai-testgame3-lvl0-v0', 'gvgai-testgame3-lvl1-v0'
]
allGames = ['Assault-v0']

envs = {}
for game in allGames:
    envs[game] = []
    for w in range(workers):  # each process needs its own environment
        envs[game].append(gym.make(game))

gameQueue = list(allGames)
random.shuffle(gameQueue)

trainer = TpgTrainer(actions=range(6), teamPopSizeInit=50)

summaryScores = []  # record score summaries for each gen (min, max, avg)

for gen in range(100):  # generation loop
    envQueue = []  # hold envs for current gen

    # get right env in envQueue
    game = gameQueue.pop()  # take out last game
    print('playing on', game)
    for w in range(workers):
        envQueue.append(envs[game][w])
    # re-get games list
    if len(gameQueue) == 0:
        gameQueue = list(allGames)
        random.shuffle(gameQueue)
예제 #5
0
allGames = [
    'gvgai-testgame1-lvl0-v0', 'gvgai-testgame1-lvl1-v0',
    'gvgai-testgame2-lvl0-v0', 'gvgai-testgame2-lvl0-v0',
    'gvgai-testgame3-lvl0-v0', 'gvgai-testgame3-lvl1-v0'
]
allGames = ['Assault-v0']

envs = {}
for game in allGames:
    envs[game] = gym.make(game)

gameQueue = list(allGames)
random.shuffle(gameQueue)

trainer = TpgTrainer(actions=range(6), teamPopSizeInit=360)

curScores = []  # hold scores in a generation
summaryScores = []  # record score summaries for each gen (min, max, avg)

for gen in range(100):  # generation loop
    curScores = []  # new list per gen

    # get right env in envQueue
    game = gameQueue.pop()  # take out last game
    print('playing on', game)
    env = envs[game]
    # re-get games list
    if len(gameQueue) == 0:
        gameQueue = list(allGames)
        random.shuffle(gameQueue)
예제 #6
0
import multiprocessing as mp
import pickle
import time

pool = mp.Pool(processes=15, initializer=limit_cpu, maxtasksperchild=1)
man = mp.Manager()  # manager for shared memory lists

allEnvNames = [
    'Alien-v0', 'Asteroids-v0', 'Atlantis-v0', 'BankHeist-v0', 'BattleZone-v0',
    'Bowling-v0', 'Boxing-v0', 'Centipede-v0'
]

# create TPG
trainer = TpgTrainer(actions=range(18),
                     teamPopSize=200,
                     rTeamPopSize=200,
                     maxProgramSize=128,
                     singlePop=False)

tstart = time.time()

# create populations
for envName in allEnvNames:
    trainer.createNewPopulation(popName=envName)

logName = 'sgp-log-8-pops.txt'
with open(logName, 'a') as f:
    f.write(
        'tpgGen,hoursElapsed,envName,tpgMin,tpgMax,tpgAvg,eliteSize,eliteUid\n'
    )
def ggpTrainAllAtOnce():

    # create TPG
    trainer = TpgTrainer(actions=range(18),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    envNamesSrt = sorted(list(allEnvNames))  # for reporting envs played

    fitnessEnvs = []

    while True:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        if options.fitGamesNum < options.numGames and options.fitGamesNum > 0:
            fitnessEnvs.append(
                random.choice(list(set(allEnvNames) - set(fitnessEnvs))))
            if len(fitnessEnvs) > options.fitGamesNum:
                fitnessEnvs.pop(0)
        else:
            fitnessEnvs = list(allEnvNames)

        for envName in fitnessEnvs:
            print('Playing Game: ' + envName)

            scoreList = man.list()

            # run all agents on env
            pool.map(runAgent,
                     [(agent, envName, scoreList, options.trainEps,
                       options.trainFrames, None)
                      for agent in trainer.getAllAgents(skipTasks=[envName],
                                                        noRef=True)])

            trainer.applyScores(scoreList)

            # report curEnv results to log
            scoreStats = trainer.getTaskScores(envName)
            bestTeam = trainer.getBestTeams(tasks=[envName])[0]
            with open(logFileGameScoresName, 'a') as f:
                f.write(
                    str(trainer.populations[None].curGen) + ',' +
                    str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                    str(scoreStats['min']) + ',' + str(scoreStats['max']) +
                    ',' + str(scoreStats['average']) + ',' +
                    str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                    str(bestTeam.uid) + '\n')

        # do evolution after all envs played
        trainer.multiEvolve(tasks=[fitnessEnvs] + [[en] for en in fitnessEnvs],
                            weights=[0.5] +
                            [0.5 / len(fitnessEnvs) for _ in fitnessEnvs],
                            fitMethod='min',
                            elitistTasks=allEnvNames)

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=fitnessEnvs)[0]
        with open(logFileFitnessName, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' +
                '/'.join(sorted(list(fitnessEnvs))) + ',' +
                str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')

        # save model after every gen
        with open(trainerFileName, 'wb') as f:
            pickle.dump(trainer, f)

        # every 10 generations save all agent outcomes
        if trainer.populations[None].curGen % 10 == 0:
            for rt in trainer.populations[None].rootTeams:
                with open(logFileOutcomesName, 'a') as f:
                    f.write(
                        str(trainer.populations[None].curGen) + ',' +
                        str((time.time() - tstart) / 3600) + ',' + str(rt.uid))
                    for envName in allEnvNames:
                        f.write(',' + str(rt.outcomes.get(envName, '-999999')))
                    f.write('\n')

        # every 50 generations evaluate top agents on all games
        if trainer.populations[None].curGen % options.champEvalGen == 0:
            champEval(envNamesSrt,
                      trainer,
                      logFileChampionsName,
                      pool,
                      man,
                      tstart,
                      frames=options.testFrames,
                      eps=options.testEps)
예제 #8
0
    env.close()
    agent.reward(scoreTotal, envName + '-' + str(numFrames))
    scoreList.append((agent.getUid(), agent.getOutcomes()))


# https://stackoverflow.com/questions/42103367/limit-total-cpu-usage-in-python-multiprocessing/42130713
def limit_cpu():
    p = psutil.Process(os.getpid())
    p.nice(10)


envName = 'Boxing-v0'

if options.curGen == 0:
    tmpEnv = gym.make(envName)
    trainer = TpgTrainer(actions=range(tmpEnv.action_space.n), teamPopSize=360)
    tmpEnv.close()
else:
    with open('saved-model-sgp.pkl', 'rb') as f:
        trainer = pickle.load(f)

processes = 2
pool = mp.Pool(processes=processes, initializer=limit_cpu, maxtasksperchild=5)
man = mp.Manager()

allScores = []  # track all scores each generation

tStart = time.time()

curGen = options.curGen  # generation of tpg
    agent.reward(score)
    scoreList.append((agent.getUid(), agent.getOutcomes()))


# https://stackoverflow.com/questions/42103367/limit-total-cpu-usage-in-python-multiprocessing/42130713
def limit_cpu():
    p = psutil.Process(os.getpid())
    p.nice(10)


if options.cont:
    with open('saved-model-1.pkl', 'rb') as f:
        trainer = pickle.load(f)
else:
    trainer = TpgTrainer(actions=19,
                         actionRange=(-1.0, 1.0, 0.35),
                         teamPopSize=360,
                         maxProgramSize=256)

processes = 7
pool = mp.Pool(processes=processes, maxtasksperchild=2)
man = mp.Manager()

allScores = []  # track all scores each generation

tStart = time.time()

logFileName = 'train-log-' + datetime.datetime.now().strftime(
    "%Y-%m-%d-%H-%M") + '.txt'

while True:  # do generations with no end
    scoreList = man.list()