Ejemplo n.º 1
0
def OldStuff():
    tf.compat.v1.disable_eager_execution()

    lr = 0.001
    numGames = 10000

    session = TriadGameSession()
    observation = session.getState()
    scores = []

    agent = Agent(gamma=0.99,
                  lr=lr,
                  epsilon=1.0,
                  epsilonDec=0.0005,
                  inputSize=[len(observation)],
                  numActions=session.getMaxActions(),
                  memSize=1000000,
                  batchSize=1024)

    for i in range(numGames):
        done = False
        score = 0
        session = TriadGameSession()
        observation = session.getState()
        while not done:
            action = agent.chooseAction(observation)
            observationNext, reward, done = session.step(action)
            score += reward
            agent.store(observation, action, reward, observationNext, done)
            observation = observationNext
            agent.learn()

        scores.append(score)
        avgScore = np.mean(scores[-100:])
        print('game:', i, 'score %.2f' % score, 'avgScore %.2f' % avgScore,
              'epsilon %.2f' % agent.epsilon)

    #agent.save()
    print('Finished!')
    score = 0

    for i in range(num_games):
        if i % 10 == 0 and i > 0:
            avg_score = np.mean(scores[max(0, i - 10):(i + 1)])
            print('episode: ', i, 'score: ', score,
                  ' average score %.3f' % avg_score,
                  'epsilon %.3f' % brain.EPSILON)
        else:
            print('episode: ', i, 'score: ', score)
        eps_history.append(brain.EPSILON)
        done = False
        observation = env.reset()
        score = 0
        while not done:
            action = brain.chooseAction(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            brain.storeTransition(observation, action, reward, observation_,
                                  done)
            observation = observation_
            brain.learn()

        scores.append(score)

    for i in range(10):
        done = False
        observation = env.reset()
        while not done:
            action = brain.chooseAction(observation)
            observation_, reward, done, info = env.step(action)
Ejemplo n.º 3
0
observation = session.getState()
scores = []

agent = Agent(gamma=0.99, lr=lr, epsilon=1.0, epsilonDec=0.0005,
              inputSize=[len(observation)],
              numActions=session.getMaxActions(),
              memSize=1000000,
              batchSize=64)

for i in range(numGames):
    done = False
    score = 0
    session = TriadGameSession()
    observation = session.getState()
    while not done:
        action = agent.chooseAction(observation)
        observationNext, reward, done = session.step(action)
        score += reward
        agent.store(observation, action, reward, observationNext, done)
        observation = observationNext
        agent.learn()

    scores.append(score)
    avgScore = np.mean(scores[-100:])
    print('game:', i,
          'score %.2f' % score,
          'avgScore %.2f' % avgScore,
          'epsilon %.2f' % agent.epsilon)

agent.save()
print('Finished!')