model = T.load('model/model.pt', map_location=T.device('cpu')) env = gym.make('LunarLander-v2') # env.monitor.start('cartpole-experiment-1', force=True) agent = Agent(gamma=0.99, epsilon=0.01, batch_size=64, n_actions=4, eps_end=0.01, input_dims=[8], lr=0.001) agent.Q_eval.load_state_dict(model) for i in range(13): done = False observation = env.reset() k = 0 tot = 0 while not done: time.sleep(.005) action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) observation = observation_ env.render() k += 1 tot += reward print('episode:', i, '; steps:', k, '; tot reward:', tot, "; sub:", tot - k) env.close()
batch_size=64, eps_end=0.01, fc1_dims=128, fc2_dims=128, replace=100, n_actions=3) scores, eps_history = [], [] for i in range(n_games): done = False score = 0 game.reset() while not done: state = game.get_state() action = agent.choose_action(state) reward, done, score = game.play_step(action) state_ = game.get_state() score += reward agent.store_transition(state, action, reward, state_, done) agent.learn() eps_history.append(agent.epsilon) scores.append(score) avg_score = np.mean(scores[-100:]) print('episode ', i, 'score %.1f' % score, 'average score %.1f' % avg_score, 'epsilon %.2f' % agent.epsilon) filename = 'snakeAI_Results.png' x = [i + 1 for i in range(n_games)]
agent.load_models() np.random.seed(0) score_history = [] for i in range(750): done = False score = 0 obs = env.reset() obs = state_preproces(obs) obs = np.reshape(obs, (1,1,32,32)) for j in range(1000): if done: break act = agent.choose_action(obs) new_state, reward, done, info = env.step_discrete(act) new_state = state_preproces(new_state) new_state = np.reshape(new_state, (1,1,32,32)) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score+= reward obs = new_state score_history.append(score) print('episode ', i, ' score %.2f' % score," steps: ",j, '100 game average %.2f' %np.mean(score_history[-100:])) plt.plot(score_history) plt.xlabel("episode") plt.ylabel("score") if i % 25 ==0:
action = env.action_space.sample() observation_, reward, done, info = env.step(action) agent.remember(observation, action, reward, observation_, done) n_steps += 1 agent.learn() agent.load_models() evaluate = True else: evaluate = False for i in range(n_games): observation = env.reset() done = False score = 0 while not done: action = agent.choose_action(observation, evaluate) observation_, reward, done, info = env.step(action) score += reward agent.remember(observation, action, reward, observation_, done) if not load_checkpoint: agent.learn() observation = observation_ score_history.append(score) avg_score = np.mean(score_history[-100:]) if avg_score > best_score: best_score = avg_score print('episode ', i, 'score %.1f' % score, 'avg score %.1f' % avg_score)