def evaluate(model_path,history_num,max_episode_steps,episode_num,result_save_path): checkpoint = torch.load(model_path) qnetwork = QNetwork(*checkpoint['model_hyper']) qnetwork.load_state_dict(checkpoint['model']) env = gym.make('MountainCar-v0') test_success_history = [] test_reward_history = [] for episode in range(episode_num): print('episode %d'%(episode)) observation = env.reset() #initialize state state = State(history_num) state.init_state(observation) done = False reward_sum = 0 for t in range(max_episode_steps): env.render() state.display() # select a action with max q value action action = qnetwork.decide_action(state.toTensor().view(1,-1)) action = action.sum().item() observation, reward, done, info = env.step(action) reward_sum = reward_sum+reward if done: print('done') print(reward_sum) success = False if observation[0]>=0.5: success = True test_success_history.append(success) test_reward_history.append(reward_sum) break state.update_state_by_observation(observation,action) print('- '*100) print('save to %s'%(result_save_path)) with open(result_save_path,'wb') as f: pkl.dump((test_success_history,test_reward_history),f)
done = False final_transition = None loss_sum = 0 reward_sum = 0 for t in range(max_episode_steps): if done: break env.render() state.display() # select a action p = random.random() if p < epsilon: action = env.action_space.sample() else: action = qnetwork.decide_action(state.toTensor().view(1, -1)) # to scalar and then to int assert list(action.shape) == [1] action = action.sum().item() observation, reward, done, info = env.step(action) if done: # -1 reward for all step avg_loss = loss_sum / max_episode_steps print('done') print(avg_loss) loss_history.append(avg_loss) reward_sum_history.append(reward_sum) old_state = copy.deepcopy(state)