예제 #1
def main():

    # initiates trainer, actions are these predictions of the agent:
    # action 1 -> right box will fall off the edge
    # action 0 -> right box will not fall off
    trainer = TpgTrainer(actions=[0, 1], teamPopSize=50)

    _min, _max, _avg = [], [], []  # hold values for every generation

    for gen in range(GENERATIONS):  # generation loop
        print("Generation: ", gen + 1, "/", GENERATIONS)
        curScores = []  # new list per gen

        while True:  # loop to go through agents
            agent = trainer.getNextAgent()
            if agent is None:
                break  # no more agents, so proceed to next gen

            # evaluting the agent
            score = 0
            for i in range(EVALUATIONS):
                score += evaluateAgent(agent)


        print("Min:", min(curScores), "   Max:", max(curScores), "   Avg:",
              sum(curScores) / len(curScores),
              "(out of " + str(EVALUATIONS) + ")\n")

        _avg.append(sum(curScores) / len(curScores))


    # getting best agent after all the generations
    best_agent, best_score = getBestAgent(trainer)

    print("Best agent's score:", best_score, "/", EVALUATIONS)

    for run in range(FINAL_RUNS):
        print("Final run: ", run + 1, "/", FINAL_RUNS, end='\r')
        evaluateAgent(best_agent, graphics=True)

    # plotting progress over the generations
    generations = range(1, GENERATIONS + 1)

    axes = plt.gca()
    axes.set_ylim([0, EVALUATIONS])

    plt.plot(generations, _min, label="min")
    plt.plot(generations, _max, label="max")
    plt.plot(generations, _avg, label="avg")

예제 #2
for gen in range(100):  # generation loop
    curScores = []  # new list per gen

    # get right env in envQueue
    game = gameQueue.pop()  # take out last game
    print('playing on', game)
    env = envs[game]
    # re-get games list
    if len(gameQueue) == 0:
        gameQueue = list(allGames)

    while True:  # loop to go through agents
        teamNum = trainer.remainingAgents()
        agent = trainer.getNextAgent()
        if agent is None:
            break  # no more agents, so proceed to next gen

        # check if agent already has score
        if agent.taskDone():
            score = agent.getOutcome()
            state = env.reset()  # get initial state and prep environment
            score = 0
            valActs = range(env.action_space.n)
            for i in range(1000):

                act = agent.act(getState(state),
                                valActs=valActs)  # get action from agent