Exemple #1
0
    print('TPG Gen: ' + str(trainer.populations[envName].curGen))
    for envName in allEnvNames:  # train on each env
        print('Playing Game: ' + envName)

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent,
                 [(agent, envName, scoreList, 3, 18000, None)
                  for agent in trainer.getAllAgents(
                      skipTasks=[envName], noRef=True, popName=envName)])

        trainer.applyScores(scoreList, popName=envName)

        # report curEnv results to log
        scoreStats = trainer.getTaskScores(envName, popName=envName)
        bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0]
        with open(logName, 'a') as f:
            f.write(
                str(trainer.populations[envName].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' +
                str(scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + '\n')

        # do evolution on each env played
        trainer.evolve(fitMethod='single',
                       tasks=[envName],
                       elitistTasks=[envName],
                       popName=envName)
def ggpTrainAllAtOnce():

    # create TPG
    trainer = TpgTrainer(actions=range(18),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    envNamesSrt = sorted(list(allEnvNames))  # for reporting envs played

    fitnessEnvs = []

    while True:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        if options.fitGamesNum < options.numGames and options.fitGamesNum > 0:
            fitnessEnvs.append(
                random.choice(list(set(allEnvNames) - set(fitnessEnvs))))
            if len(fitnessEnvs) > options.fitGamesNum:
                fitnessEnvs.pop(0)
        else:
            fitnessEnvs = list(allEnvNames)

        for envName in fitnessEnvs:
            print('Playing Game: ' + envName)

            scoreList = man.list()

            # run all agents on env
            pool.map(runAgent,
                     [(agent, envName, scoreList, options.trainEps,
                       options.trainFrames, None)
                      for agent in trainer.getAllAgents(skipTasks=[envName],
                                                        noRef=True)])

            trainer.applyScores(scoreList)

            # report curEnv results to log
            scoreStats = trainer.getTaskScores(envName)
            bestTeam = trainer.getBestTeams(tasks=[envName])[0]
            with open(logFileGameScoresName, 'a') as f:
                f.write(
                    str(trainer.populations[None].curGen) + ',' +
                    str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                    str(scoreStats['min']) + ',' + str(scoreStats['max']) +
                    ',' + str(scoreStats['average']) + ',' +
                    str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                    str(bestTeam.uid) + '\n')

        # do evolution after all envs played
        trainer.multiEvolve(tasks=[fitnessEnvs] + [[en] for en in fitnessEnvs],
                            weights=[0.5] +
                            [0.5 / len(fitnessEnvs) for _ in fitnessEnvs],
                            fitMethod='min',
                            elitistTasks=allEnvNames)

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=fitnessEnvs)[0]
        with open(logFileFitnessName, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' +
                '/'.join(sorted(list(fitnessEnvs))) + ',' +
                str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')

        # save model after every gen
        with open(trainerFileName, 'wb') as f:
            pickle.dump(trainer, f)

        # every 10 generations save all agent outcomes
        if trainer.populations[None].curGen % 10 == 0:
            for rt in trainer.populations[None].rootTeams:
                with open(logFileOutcomesName, 'a') as f:
                    f.write(
                        str(trainer.populations[None].curGen) + ',' +
                        str((time.time() - tstart) / 3600) + ',' + str(rt.uid))
                    for envName in allEnvNames:
                        f.write(',' + str(rt.outcomes.get(envName, '-999999')))
                    f.write('\n')

        # every 50 generations evaluate top agents on all games
        if trainer.populations[None].curGen % options.champEvalGen == 0:
            champEval(envNamesSrt,
                      trainer,
                      logFileChampionsName,
                      pool,
                      man,
                      tstart,
                      frames=options.testFrames,
                      eps=options.testEps)