예제 #1
0
    def validation(self):
        self.is_valid = True
        self.is_test = True
        saver = ReplaySaver()
        state = self.env.reset(resize=RESIZE, size=RESIZE_SIZE)
        state = torch.from_numpy(state).permute(2, 0, 1).unsqueeze(0).type(
            torch.FloatTensor).to(self.device)
        done = False
        total_reward = 0

        if VALID_INVINCIBLE == True:
            self.invincible = True
        else:
            self.invincible = False

        self.valid_hit = 0
        vaild_t = 0
        if VALID_INVINCIBLE == False:
            while not done:
                action = self.select_action(state)
                state, reward, done, end = self.step(action)
                total_reward += reward
                print('\r Now rewards: {}'.format(total_reward), end='\r')
                saver.get_current_frame()

        else:
            while (not done and vaild_t <= VALID_MAX_T):
                vaild_t += 1
                action = self.select_action(state)
                state, reward, done, end = self.step(action)
                total_reward += reward
                print('\r Now rewards: {}'.format(total_reward), end='\r')
                saver.get_current_frame()
                self.valid_hit += self.env.is_collision()
            self.valid_hit_list.append(self.valid_hit)
            np.save('./checkpoints/valid_hits', np.array(self.valid_hit_list))

        self.valid_reward.append(total_reward)
        print('Valid reward: {} | Hit get: {}'.format(total_reward,
                                                      self.valid_hit))
        np.save('./checkpoints/valid_reward', np.array(self.valid_reward))
        saver.save_best()
        print('Making video...')
        saver.make_video('./checkpoints/checkpoint_{}.mp4'.format(
            self.checkpoint_n))
        print('Validation video made!!')

        self.is_test = False
        self.is_valid = False
        if INVINCIBLE == True:
            self.invincible = True
        else:
            self.invincible = False
예제 #2
0
    def test(self,
             episodes=10,
             saving_path='./test.mp4',
             size=(750, 750),
             fps=60):
        saver = ReplaySaver()
        rewards = []
        best_reward = -np.inf

        for i in range(episodes):
            done = False
            state = self.env.reset(resize=is_resize, size=resize_size)

            saver.get_current_frame()
            episode_reward = 0.0

            # play one game
            while (not done):
                state = torch.from_numpy(state).permute(
                    2, 0, 1).unsqueeze(0).type(torch.cuda.FloatTensor).to(
                        self.device)
                action = self.make_action(state, test=True)
                state, reward, done, end = self.env.step(action,
                                                         resize=is_resize,
                                                         size=resize_size)

                saver.get_current_frame()
                episode_reward += reward
                print('\r episode: {} | Now reward: {} '.format(
                    i + 1, episode_reward),
                      end='\r')

            while (end):
                _, _, _, end = self.env.step(action)
                saver.get_current_frame()

            if episode_reward <= best_reward:
                saver.reset()
            else:
                best_reward = episode_reward
                saver.save_best()
                saver.reset()

            rewards.append(episode_reward)
        print('\nRun %d episodes' % (episodes))
        print('Mean:', np.mean(rewards))
        print('Median:', np.median(rewards))
        print('Saving best reward video')
        saver.make_video(path=saving_path, size=size, fps=fps)
        print('Video saved :)')
예제 #3
0
    def test_random(self,
                    episodes=10,
                    saving_path='./test_random.mp4',
                    size=(750, 750),
                    fps=30,
                    invincible_mode=False,
                    test_max_t=500):
        saver = ReplaySaver()
        rewards = []
        best_reward = -np.inf
        self.is_test = True

        if invincible_mode == True:
            self.invincible = True
        else:
            self.invincible = False

        for i in range(episodes):
            done = False
            state = self.env.reset(resize=RESIZE, size=RESIZE_SIZE)
            saver.get_current_frame()
            episode_reward = 0.0

            # play one game
            if self.invincible == False:
                while (not done):
                    action = np.random.choice(5, 1)[0]
                    state, reward, done, end = self.env.step(
                        action,
                        resize=RESIZE,
                        size=RESIZE_SIZE,
                        invincible=self.invincible)

                    saver.get_current_frame()
                    episode_reward += reward
                    print('\r episode: {} | Now reward: {} '.format(
                        i + 1, episode_reward),
                          end='\r')

            else:
                t = 0
                while (t <= test_max_t):
                    t += 1
                    action = np.random.choice(5, 1)[0]
                    state, reward, done, end = self.env.step(
                        action,
                        resize=RESIZE,
                        size=RESIZE_SIZE,
                        invincible=self.invincible)

                    saver.get_current_frame()
                    episode_reward += reward
                    print('\r episode: {} | Now reward: {} '.format(
                        i + 1, episode_reward),
                          end='\r')

            if episode_reward <= best_reward:
                saver.reset()
            else:
                best_reward = episode_reward
                saver.save_best()
                saver.reset()

            rewards.append(episode_reward)
        print('Run %d episodes' % (episodes))
        print('Mean:', np.mean(rewards))
        print('Median:', np.median(rewards))
        print('Saving best reward video')
        saver.make_video(path=saving_path, size=size, fps=fps)
        np.save('./checkpoints/test_random_reward', rewards)
        print('Video saved :)')
예제 #4
0
    def test(self,
             episodes=10,
             saving_path='./test.mp4',
             size=(750, 750),
             fps=30,
             invincible_mode=False,
             test_max_t=500):
        saver = ReplaySaver()
        rewards = []
        best_reward = -np.inf
        self.is_test = True
        if invincible_mode == True:
            self.invincible = True
        else:
            self.invincible = False

        for i in range(episodes):
            done = False
            state = self.env.reset(resize=RESIZE, size=RESIZE_SIZE)
            state = torch.from_numpy(state).permute(2, 0, 1).unsqueeze(0).type(
                torch.cuda.FloatTensor).to(DEVICE)
            saver.get_current_frame()
            episode_reward = 0.0

            # play one game
            if self.invincible == False:
                while (not done):
                    action = self.select_action(state)
                    state, reward, done, end = self.step(action)

                    saver.get_current_frame()
                    episode_reward += reward
                    print('\r episode: {} | Now reward: {} '.format(
                        i + 1, episode_reward),
                          end='\r')
            else:
                self.t = 0
                while (self.t <= test_max_t):
                    self.t += 1
                    action = self.select_action(state)
                    state, reward, done, end = self.step(action)

                    saver.get_current_frame()
                    episode_reward += reward
                    print('\r episode: {} | Now reward: {} '.format(
                        i + 1, episode_reward),
                          end='\r')

            if episode_reward <= best_reward:
                saver.reset()
            else:
                best_reward = episode_reward
                saver.save_best()
                saver.reset()

            rewards.append(episode_reward)
        print('Run %d episodes' % (episodes))
        print('Mean:', np.mean(rewards))
        print('Median:', np.median(rewards))
        print('Saving best reward video')
        saver.make_video(path=saving_path, size=size, fps=fps)
        np.save('./checkpoints/test_reward', rewards)
        print('Video saved :)')