Esempio n. 1
0
 def __init__(self):
     super().__init__()
     #self.action_space = np.array([0,0])
     #self.observation_space = np.array([0,0,0,0,0,0,0,0,0])
     self.game = BouncyBalls()
     self.step_count = 0
     self.cumulative_action = np.zeros(6)
     print('created')
Esempio n. 2
0
 def __init__(self):
     super().__init__()
     #self.action_space = np.array([0,0])
     #self.observation_space = np.array([0,0,0,0,0,0,0,0,0])
     self.game = BouncyBalls()
     self.step_count = 0
     #self.cumulative_action = np.zeros(6)
     self.pred_net = LSTM_Init_To_Many_3()
     self.device = torch.device(
         "cuda:0" if torch.cuda.is_available() else "cpu")
     self.pred_net.load_state_dict(
         torch.load('preTrained/CP_epoch30.pth', map_location=self.device))
     self.pred_net.to(device=self.device)
     self.pred_net.eval()
     print('created')
Esempio n. 3
0
class ball_env_1(Env):
    """
    Ball environment #1
    Only one time step.
    """
    def __init__(self):
        super().__init__()
        self.action_space = np.zeros(6)
        self.observation_space = np.zeros(1)
        self.game = BouncyBalls()
        print('created')

    def step(self, action):
        obs = np.zeros(1)
        ball_posi = self.game.run_one_episode(action)
        reward = self.posi_reward(ball_posi)
        done = True
        info = {}
        #print(reward)
        return obs, reward, done, info

    def posi_reward(self, posi):
        reward = 0.0
        if posi[0] >= 550 and posi[1] > 150:
            reward = 1.0
        return reward

    def reset(self):
        obs = np.zeros(1)
        return obs
Esempio n. 4
0
class ball_env_4(Env):
    """
    Ball environment #4
    Ten time steps. At each time step, change position or let the ball go. No prediction model.
    """
    action_space = np.zeros(6 +
                            1)  # posi for three platforms + let the ball go
    observation_space = np.zeros(7)

    def __init__(self):
        super().__init__()
        #self.action_space = np.array([0,0])
        #self.observation_space = np.array([0,0,0,0,0,0,0,0,0])
        self.game = BouncyBalls()
        self.step_count = 0
        self.cumulative_action = np.zeros(6)
        print('created')

    def step(self, action):
        self.step_count += 1

        if action[6] > 0 or self.step_count >= 10:
            # let the ball go
            obs = action
            ball_posi = self.game.run_one_episode(self.cumulative_action)

            reward = self.posi_reward(ball_posi)
            # special case for leting the ball go on first step
            if self.step_count == 1:
                reward = 0
            done = True

            # reset
            self.step_count = 0
            #self.cumulative_action = np.array([0,0,0,0,0,0])
        else:
            self.cumulative_action = action
            obs = action
            reward = 0
            done = False

        #obs = np.array([0,0,0,0,0,0,0,0,0])
        #for i in range(self.step_count):
        #    obs[i*3] = 1
        #    obs[i*3+1:i*3+3] = action
        info = {}
        #print(reward)
        return obs, reward, done, info

    def posi_reward(self, posi):
        reward = 0.0
        if posi[0] >= 550 and posi[1] > 150:
            reward = 1.0
        return reward

    def reset(self):
        obs = self.observation_space
        return obs
Esempio n. 5
0
class ball_env_3(Env):
    """
    Ball environment #3
    Three time steps, each for one platform. With predicted ball position as observation.
    """
    action_space = np.zeros(2)
    observation_space = np.zeros(11)

    def __init__(self):
        super().__init__()
        #self.action_space = np.array([0,0])
        #self.observation_space = np.array([0,0,0,0,0,0,0,0,0])
        self.game = BouncyBalls()
        self.step_count = 0
        self.cumulative_action = np.zeros(6)
        print('created')

    def step(self, action):
        self.step_count += 1

        if self.step_count >= 3:
            # let the ball go
            ball_posi = self.game.run_one_episode(self.cumulative_action)
            reward = self.posi_reward(ball_posi)
            done = True
            # reset
            self.step_count = 0
            self.cumulative_action = np.array([0, 0, 0, 0, 0, 0])
        else:
            self.cumulative_action[self.step_count * 2 - 2:self.step_count *
                                   2] = action
            reward = 0
            done = False

        obs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])
        for i in range(self.step_count):
            obs[i * 3] = 1
            obs[i * 3 + 1:i * 3 + 3] = action
        info = {}
        #print(reward)
        return obs, reward, done, info

    def posi_reward(self, posi):
        reward = 0.0
        if posi[0] >= 550 and posi[1] > 150:
            reward = 1.0
        return reward

    def reset(self):
        obs = self.observation_space
        return obs
Esempio n. 6
0
class ball_env_6(Env):
    """
    Ball environment #6
    Ten time steps. At each time step, change position or let the ball go. With ORACLE predicted ball position as observation.
    """
    action_space = np.zeros(6 +
                            1)  # posi for three platforms + let the ball go
    observation_space = np.zeros(7 + 2)  # action + predicted ball posi

    def __init__(self):
        super().__init__()
        #self.action_space = np.array([0,0])
        #self.observation_space = np.array([0,0,0,0,0,0,0,0,0])
        self.game = BouncyBalls()
        self.step_count = 0
        #self.cumulative_action = np.zeros(6)
        self.pred_net = LSTM_Init_To_Many_3()
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.pred_net.load_state_dict(
            torch.load('preTrained/CP_epoch30.pth', map_location=self.device))
        self.pred_net.to(device=self.device)
        self.pred_net.eval()
        print('created')

    def step(self, action):
        self.step_count += 1

        if action[6] > 0 or self.step_count >= 10:
            # let the ball go
            ball_posi = self.game.run_one_episode(action)
            reward = self.posi_reward(ball_posi)
            # special case for leting the ball go on first step
            if self.step_count == 1:
                reward = 0
            done = True
            # reset
            self.step_count = 0
            #self.cumulative_action = np.array([0,0,0,0,0,0])
        else:
            #self.cumulative_action[self.step_count*2-2:self.step_count*2] = action
            reward = 0
            done = False

        ball_posi = self.game.run_one_episode(action)
        last_posi = (ball_posi - np.array([300, 300])) / np.array([300, 300])

        obs = np.zeros(9)
        obs[:7] = action
        obs[7:] = last_posi
        info = {}
        #print(reward)
        return obs, reward, done, info

    def posi_reward(self, posi):
        reward = 0.0
        if posi[0] >= 550 and posi[1] > 150:
            reward = 1.0
        return reward

    def reset(self):
        obs = self.observation_space
        return obs
Esempio n. 7
0
class ball_env_5(Env):
    """
    Ball environment #5
    Ten time steps. At each time step, change position or let the ball go. With predicted ball position as observation.
    """
    action_space = np.zeros(6 +
                            1)  # posi for three platforms + let the ball go
    observation_space = np.zeros(7 + 2)  # action + predicted ball posi

    def __init__(self):
        super().__init__()
        #self.action_space = np.array([0,0])
        #self.observation_space = np.array([0,0,0,0,0,0,0,0,0])
        self.game = BouncyBalls()
        self.step_count = 0
        #self.cumulative_action = np.zeros(6)
        self.pred_net = LSTM_Init_To_Many_3()
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.pred_net.load_state_dict(
            torch.load('preTrained/CP_epoch30.pth', map_location=self.device))
        self.pred_net.to(device=self.device)
        self.pred_net.eval()
        print('created')

    def step(self, action):
        self.step_count += 1

        if action[6] > 0 or self.step_count >= 10:
            # let the ball go
            ball_posi = self.game.run_one_episode(action)
            reward = self.posi_reward(ball_posi)
            # special case for leting the ball go on first step
            if self.step_count == 1:
                reward = 0
            done = True
            # reset
            self.step_count = 0
            #self.cumulative_action = np.array([0,0,0,0,0,0])
        else:
            #self.cumulative_action[self.step_count*2-2:self.step_count*2] = action
            reward = 0
            done = False

        # prediction
        mean = np.array([30.893, 270.33, 200.388, 199.573, 350.057, 200.53])
        std = np.array([
            14.54288661, 14.70269023, 14.31668453, 14.40488358, 14.85717843,
            15.25080654
        ])
        normalized_platform_posi = (action[:6] - mean) / std
        with torch.no_grad():
            pred_input = torch.from_numpy(
                np.expand_dims(normalized_platform_posi,
                               axis=0)).float().to(self.device)
            pred_output = self.pred_net(pred_input).cpu().numpy()

        def get_pred_ball_posi(pred_output,
                               x_min=20,
                               x_max=550,
                               y_min=50,
                               y_max=550):
            mean = np.array([163.29437530326638, 279.7768839198992])
            std = np.array([138.14349185245848, 113.09608505385799])
            last_posi = pred_output[0, -1]
            pred_output_denormlilzed = pred_output * std + mean
            for i in range(pred_output.shape[1]):
                if pred_output_denormlilzed[
                        0, i, 0] < x_min or pred_output_denormlilzed[
                            0, i, 0] > x_max or pred_output_denormlilzed[
                                0, i, 1] < y_min or pred_output_denormlilzed[
                                    0, i, 1] > y_max:
                    last_posi = pred_output[0, i]
                    break
            return last_posi

        last_posi = get_pred_ball_posi(pred_output)

        obs = np.zeros(9)
        obs[:7] = action
        obs[7:] = last_posi
        info = {}
        #print(reward)
        return obs, reward, done, info

    def posi_reward(self, posi):
        reward = 0.0
        if posi[0] >= 550 and posi[1] > 150:
            reward = 1.0
        return reward

    def reset(self):
        obs = self.observation_space
        return obs
Esempio n. 8
0
 def __init__(self):
     super().__init__()
     self.action_space = np.zeros(6)
     self.observation_space = np.zeros(1)
     self.game = BouncyBalls()
     print('created')