input_dims=[input_dims], lr=0.003) scores, avg_scores, eps_history = [], [], [] epochs = 500 for epoch in range(epochs): score = 0 done = False state_old = env.reset() # print(state_old[0].type) while not done: # iterating over every timestep (state) env.render() action = agent.choose_action(state_old) state_new, reward, done, info = env.step(action) score += reward agent.store_transition(state_old, action, reward, state_new, done) agent.learn() state_old = state_new scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) avg_scores.append(avg_score) print("epoch: ", epoch, "score: %.2f " % score, "avg_score: %.2f " % avg_score, "epsilon: %.2f" % agent.epsilon) simple_plot(scores, avg_scores, epoch) env.close()
cum_reward = 0 while not done: # Select and perform an action action = agent.get_action(state, eps) next_state, reward, done, _ = env.step(action) cum_reward += reward # Task 1: TODO: Update the Q-values #agent.single_update(state,action,next_state,reward,done) # Task 2: TODO: Store transition and batch-update Q-values #agent.store_transition(state,action,next_state,reward,done) #agent.update_estimator() # Task 4: Update the DQN agent.store_transition(state, action, next_state, reward, done) agent.update_network() # Move to the next state state = next_state cumulative_rewards.append(cum_reward) print("ep : " + str(ep) + " rew : " + str(cum_reward)) #plot_rewards(cumulative_rewards) # Update the target network, copying all weights and biases in DQN # Uncomment for Task 4 if ep % TARGET_UPDATE == 0: agent.update_target_network() # Save the policy # Uncomment for Task 4 if ep % 1000 == 0:
eps_end=0.01, input_dims=[8], lr=0.001) scores, eps_history = [], [] n_games = 500 for i in range(n_games): score = 0 done = False observation = env.reset() while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) agent.learn() observation = observation_ #env.render() scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon) x = [i + 1 for i in range(n_games)] filename = 'lunar_lander_dqn_2.png' plotLearning(x, scores, eps_history, filename)