def runTpg():

    tmpEnv = gym.make(options.envName)
    # create TPG
    trainer = TpgTrainer(actions=range(tmpEnv.action_space.n),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    tmpEnv.close()

    print('Playing Game: ' + options.envName)

    while trainer.populations[
            None].curGen < options.generations:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent, [(agent, options.envName, scoreList,
                             options.trainEps, options.trainFrames, None)
                            for agent in trainer.getAllAgents(
                                skipTasks=[options.envName], noRef=True)])

        trainer.applyScores(scoreList)

        # do evolution after all envs played
        trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName])

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=[options.envName])[0]
        with open(logFileTpg, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + options.envName +
                ',' + str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')
Exemple #2
0
    # get right env in envQueue
    game = gameQueue.pop()  # take out last game
    print('playing on', game)
    # re-get games list
    if len(gameQueue) == 0:
        gameQueue = list(allGames)
        random.shuffle(gameQueue)

    # tasks = [str(envs[game][0].env)]

    # run generation
    pool.map(
        runAgent,
        [
            (agent, game, scoreQueue)  #(agent, envQueue, scoreQueue)
            for agent in trainer.getAllAgents(skipTasks=[])
        ])

    scores = []  # convert scores into list
    while not scoreQueue.empty():
        scores.append(scoreQueue.get())

    # save model before every evolve in case issue
    with open('gvgai-model-1be.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # apply scores
    trainer.applyScores(scores)
    trainer.evolve()  # go into next gen

    # save model after every gen
Exemple #3
0
    # get right env in envQueue
    game = gameQueue.pop()  # take out last game
    print('playing on', game)
    for w in range(workers):
        envQueue.append(envs[game][w])
    # re-get games list
    if len(gameQueue) == 0:
        gameQueue = list(allGames)
        random.shuffle(gameQueue)

    # tasks = [str(envs[game][0].env)]

    with ThreadPoolExecutor(max_workers=workers) as executor:
        {
            executor.submit(runAgent, agent, envQueue): agent
            for agent in trainer.getAllAgents()
        }

    trainer.evolve()  # go into next gen

    # save model after every gen
    with open('gvgai-model-1.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # at end of generation, make summary of scores
    summaryScores.append((trainer.scoreStats['min'], trainer.scoreStats['max'],
                          trainer.scoreStats['average']))  # min, max, avg

    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('Results so far: ' + str(summaryScores))
Exemple #4
0
    f.write(
        'tpgGen,hoursElapsed,envName,tpgMin,tpgMax,tpgAvg,eliteSize,eliteUid\n'
    )

while trainer.populations[
        allEnvNames[0]].curGen < 300:  # 300 generations at each game
    print('TPG Gen: ' + str(trainer.populations[envName].curGen))
    for envName in allEnvNames:  # train on each env
        print('Playing Game: ' + envName)

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent,
                 [(agent, envName, scoreList, 3, 18000, None)
                  for agent in trainer.getAllAgents(
                      skipTasks=[envName], noRef=True, popName=envName)])

        trainer.applyScores(scoreList, popName=envName)

        # report curEnv results to log
        scoreStats = trainer.getTaskScores(envName, popName=envName)
        bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0]
        with open(logName, 'a') as f:
            f.write(
                str(trainer.populations[envName].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' +
                str(scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + '\n')
def ggpTrainAllAtOnce():

    # create TPG
    trainer = TpgTrainer(actions=range(18),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    envNamesSrt = sorted(list(allEnvNames))  # for reporting envs played

    fitnessEnvs = []

    while True:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        if options.fitGamesNum < options.numGames and options.fitGamesNum > 0:
            fitnessEnvs.append(
                random.choice(list(set(allEnvNames) - set(fitnessEnvs))))
            if len(fitnessEnvs) > options.fitGamesNum:
                fitnessEnvs.pop(0)
        else:
            fitnessEnvs = list(allEnvNames)

        for envName in fitnessEnvs:
            print('Playing Game: ' + envName)

            scoreList = man.list()

            # run all agents on env
            pool.map(runAgent,
                     [(agent, envName, scoreList, options.trainEps,
                       options.trainFrames, None)
                      for agent in trainer.getAllAgents(skipTasks=[envName],
                                                        noRef=True)])

            trainer.applyScores(scoreList)

            # report curEnv results to log
            scoreStats = trainer.getTaskScores(envName)
            bestTeam = trainer.getBestTeams(tasks=[envName])[0]
            with open(logFileGameScoresName, 'a') as f:
                f.write(
                    str(trainer.populations[None].curGen) + ',' +
                    str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                    str(scoreStats['min']) + ',' + str(scoreStats['max']) +
                    ',' + str(scoreStats['average']) + ',' +
                    str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                    str(bestTeam.uid) + '\n')

        # do evolution after all envs played
        trainer.multiEvolve(tasks=[fitnessEnvs] + [[en] for en in fitnessEnvs],
                            weights=[0.5] +
                            [0.5 / len(fitnessEnvs) for _ in fitnessEnvs],
                            fitMethod='min',
                            elitistTasks=allEnvNames)

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=fitnessEnvs)[0]
        with open(logFileFitnessName, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' +
                '/'.join(sorted(list(fitnessEnvs))) + ',' +
                str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')

        # save model after every gen
        with open(trainerFileName, 'wb') as f:
            pickle.dump(trainer, f)

        # every 10 generations save all agent outcomes
        if trainer.populations[None].curGen % 10 == 0:
            for rt in trainer.populations[None].rootTeams:
                with open(logFileOutcomesName, 'a') as f:
                    f.write(
                        str(trainer.populations[None].curGen) + ',' +
                        str((time.time() - tstart) / 3600) + ',' + str(rt.uid))
                    for envName in allEnvNames:
                        f.write(',' + str(rt.outcomes.get(envName, '-999999')))
                    f.write('\n')

        # every 50 generations evaluate top agents on all games
        if trainer.populations[None].curGen % options.champEvalGen == 0:
            champEval(envNamesSrt,
                      trainer,
                      logFileChampionsName,
                      pool,
                      man,
                      tstart,
                      frames=options.testFrames,
                      eps=options.testEps)
Exemple #6
0
logFileName = 'sgp-log-' + datetime.datetime.now().strftime(
    "%Y-%m-%d-%H-%M") + '.txt'

if curGen < 25:
    frames = 1000
elif curGen < 50:
    frames = 5000
else:
    frames = 18000

while True:  # do generations with no end
    curGen += 1
    scoreList = man.list()

    agents = trainer.getAllAgents(
        skipTasks=[envName + '-' + str(frames)],
        noRef=True)  # swap out agents only at start of generation

    if curGen == 1:
        frames = 1000
    elif curGen == 25:
        frames = 5000
    elif curGen == 50:
        frames = 18000
    pool.map(runAgent,
             [(agent, envName, scoreList, 1, frames) for agent in agents])

    # apply scores
    trainer.applyScores(scoreList)

    tasks = [envName + '-' + str(frames)]
processes = 7
pool = mp.Pool(processes=processes, maxtasksperchild=2)
man = mp.Manager()

allScores = []  # track all scores each generation

tStart = time.time()

logFileName = 'train-log-' + datetime.datetime.now().strftime(
    "%Y-%m-%d-%H-%M") + '.txt'

while True:  # do generations with no end
    scoreList = man.list()

    pool.map(runAgent, [(agent, scoreList, 0.05)
                        for agent in trainer.getAllAgents(noRef=True)])

    # apply scores
    trainer.applyScores(scoreList)
    trainer.evolve(fitShare=False, tasks=[])
    scoreStats = trainer.scoreStats
    allScores.append((trainer.curGen, scoreStats['min'], scoreStats['max'],
                      scoreStats['average']))

    # save model after every gen
    with open('saved-model-1.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # save best agent each generation
    #with open('best-agent.pkl','wb') as f:
    #    pickle.dump(trainer.getBestAgent(tasks=[]), f)