def validation(self): self.is_valid = True self.is_test = True saver = ReplaySaver() state = self.env.reset(resize=RESIZE, size=RESIZE_SIZE) state = torch.from_numpy(state).permute(2, 0, 1).unsqueeze(0).type( torch.FloatTensor).to(self.device) done = False total_reward = 0 if VALID_INVINCIBLE == True: self.invincible = True else: self.invincible = False self.valid_hit = 0 vaild_t = 0 if VALID_INVINCIBLE == False: while not done: action = self.select_action(state) state, reward, done, end = self.step(action) total_reward += reward print('\r Now rewards: {}'.format(total_reward), end='\r') saver.get_current_frame() else: while (not done and vaild_t <= VALID_MAX_T): vaild_t += 1 action = self.select_action(state) state, reward, done, end = self.step(action) total_reward += reward print('\r Now rewards: {}'.format(total_reward), end='\r') saver.get_current_frame() self.valid_hit += self.env.is_collision() self.valid_hit_list.append(self.valid_hit) np.save('./checkpoints/valid_hits', np.array(self.valid_hit_list)) self.valid_reward.append(total_reward) print('Valid reward: {} | Hit get: {}'.format(total_reward, self.valid_hit)) np.save('./checkpoints/valid_reward', np.array(self.valid_reward)) saver.save_best() print('Making video...') saver.make_video('./checkpoints/checkpoint_{}.mp4'.format( self.checkpoint_n)) print('Validation video made!!') self.is_test = False self.is_valid = False if INVINCIBLE == True: self.invincible = True else: self.invincible = False
def test(self, episodes=10, saving_path='./test.mp4', size=(750, 750), fps=60): saver = ReplaySaver() rewards = [] best_reward = -np.inf for i in range(episodes): done = False state = self.env.reset(resize=is_resize, size=resize_size) saver.get_current_frame() episode_reward = 0.0 # play one game while (not done): state = torch.from_numpy(state).permute( 2, 0, 1).unsqueeze(0).type(torch.cuda.FloatTensor).to( self.device) action = self.make_action(state, test=True) state, reward, done, end = self.env.step(action, resize=is_resize, size=resize_size) saver.get_current_frame() episode_reward += reward print('\r episode: {} | Now reward: {} '.format( i + 1, episode_reward), end='\r') while (end): _, _, _, end = self.env.step(action) saver.get_current_frame() if episode_reward <= best_reward: saver.reset() else: best_reward = episode_reward saver.save_best() saver.reset() rewards.append(episode_reward) print('\nRun %d episodes' % (episodes)) print('Mean:', np.mean(rewards)) print('Median:', np.median(rewards)) print('Saving best reward video') saver.make_video(path=saving_path, size=size, fps=fps) print('Video saved :)')
def test_random(self, episodes=10, saving_path='./test_random.mp4', size=(750, 750), fps=30, invincible_mode=False, test_max_t=500): saver = ReplaySaver() rewards = [] best_reward = -np.inf self.is_test = True if invincible_mode == True: self.invincible = True else: self.invincible = False for i in range(episodes): done = False state = self.env.reset(resize=RESIZE, size=RESIZE_SIZE) saver.get_current_frame() episode_reward = 0.0 # play one game if self.invincible == False: while (not done): action = np.random.choice(5, 1)[0] state, reward, done, end = self.env.step( action, resize=RESIZE, size=RESIZE_SIZE, invincible=self.invincible) saver.get_current_frame() episode_reward += reward print('\r episode: {} | Now reward: {} '.format( i + 1, episode_reward), end='\r') else: t = 0 while (t <= test_max_t): t += 1 action = np.random.choice(5, 1)[0] state, reward, done, end = self.env.step( action, resize=RESIZE, size=RESIZE_SIZE, invincible=self.invincible) saver.get_current_frame() episode_reward += reward print('\r episode: {} | Now reward: {} '.format( i + 1, episode_reward), end='\r') if episode_reward <= best_reward: saver.reset() else: best_reward = episode_reward saver.save_best() saver.reset() rewards.append(episode_reward) print('Run %d episodes' % (episodes)) print('Mean:', np.mean(rewards)) print('Median:', np.median(rewards)) print('Saving best reward video') saver.make_video(path=saving_path, size=size, fps=fps) np.save('./checkpoints/test_random_reward', rewards) print('Video saved :)')
def test(self, episodes=10, saving_path='./test.mp4', size=(750, 750), fps=30, invincible_mode=False, test_max_t=500): saver = ReplaySaver() rewards = [] best_reward = -np.inf self.is_test = True if invincible_mode == True: self.invincible = True else: self.invincible = False for i in range(episodes): done = False state = self.env.reset(resize=RESIZE, size=RESIZE_SIZE) state = torch.from_numpy(state).permute(2, 0, 1).unsqueeze(0).type( torch.cuda.FloatTensor).to(DEVICE) saver.get_current_frame() episode_reward = 0.0 # play one game if self.invincible == False: while (not done): action = self.select_action(state) state, reward, done, end = self.step(action) saver.get_current_frame() episode_reward += reward print('\r episode: {} | Now reward: {} '.format( i + 1, episode_reward), end='\r') else: self.t = 0 while (self.t <= test_max_t): self.t += 1 action = self.select_action(state) state, reward, done, end = self.step(action) saver.get_current_frame() episode_reward += reward print('\r episode: {} | Now reward: {} '.format( i + 1, episode_reward), end='\r') if episode_reward <= best_reward: saver.reset() else: best_reward = episode_reward saver.save_best() saver.reset() rewards.append(episode_reward) print('Run %d episodes' % (episodes)) print('Mean:', np.mean(rewards)) print('Median:', np.median(rewards)) print('Saving best reward video') saver.make_video(path=saving_path, size=size, fps=fps) np.save('./checkpoints/test_reward', rewards) print('Video saved :)')