def OldStuff(): tf.compat.v1.disable_eager_execution() lr = 0.001 numGames = 10000 session = TriadGameSession() observation = session.getState() scores = [] agent = Agent(gamma=0.99, lr=lr, epsilon=1.0, epsilonDec=0.0005, inputSize=[len(observation)], numActions=session.getMaxActions(), memSize=1000000, batchSize=1024) for i in range(numGames): done = False score = 0 session = TriadGameSession() observation = session.getState() while not done: action = agent.chooseAction(observation) observationNext, reward, done = session.step(action) score += reward agent.store(observation, action, reward, observationNext, done) observation = observationNext agent.learn() scores.append(score) avgScore = np.mean(scores[-100:]) print('game:', i, 'score %.2f' % score, 'avgScore %.2f' % avgScore, 'epsilon %.2f' % agent.epsilon) #agent.save() print('Finished!')
score = 0 for i in range(num_games): if i % 10 == 0 and i > 0: avg_score = np.mean(scores[max(0, i - 10):(i + 1)]) print('episode: ', i, 'score: ', score, ' average score %.3f' % avg_score, 'epsilon %.3f' % brain.EPSILON) else: print('episode: ', i, 'score: ', score) eps_history.append(brain.EPSILON) done = False observation = env.reset() score = 0 while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) score += reward brain.storeTransition(observation, action, reward, observation_, done) observation = observation_ brain.learn() scores.append(score) for i in range(10): done = False observation = env.reset() while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action)
observation = session.getState() scores = [] agent = Agent(gamma=0.99, lr=lr, epsilon=1.0, epsilonDec=0.0005, inputSize=[len(observation)], numActions=session.getMaxActions(), memSize=1000000, batchSize=64) for i in range(numGames): done = False score = 0 session = TriadGameSession() observation = session.getState() while not done: action = agent.chooseAction(observation) observationNext, reward, done = session.step(action) score += reward agent.store(observation, action, reward, observationNext, done) observation = observationNext agent.learn() scores.append(score) avgScore = np.mean(scores[-100:]) print('game:', i, 'score %.2f' % score, 'avgScore %.2f' % avgScore, 'epsilon %.2f' % agent.epsilon) agent.save() print('Finished!')