batch_size=4, n_actions=n_actions, eps_end=0.01, input_dims=[input_dims], lr=0.003) scores, avg_scores, eps_history = [], [], [] epochs = 500 for epoch in range(epochs): score = 0 done = False state_old = env.reset() # print(state_old[0].type) while not done: # iterating over every timestep (state) env.render() action = agent.choose_action(state_old) state_new, reward, done, info = env.step(action) score += reward agent.store_transition(state_old, action, reward, state_new, done) agent.learn() state_old = state_new scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) avg_scores.append(avg_score) print("epoch: ", epoch, "score: %.2f " % score, "avg_score: %.2f " % avg_score, "epsilon: %.2f" % agent.epsilon) simple_plot(scores, avg_scores, epoch)
epsilon=1.0, batch_size=64, n_actions=4, eps_end=0.01, input_dims=[8], lr=0.001) scores, eps_history = [], [] n_games = 500 for i in range(n_games): score = 0 done = False observation = env.reset() while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) agent.learn() observation = observation_ #env.render() scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)