def __init__(self): super().__init__() #self.action_space = np.array([0,0]) #self.observation_space = np.array([0,0,0,0,0,0,0,0,0]) self.game = BouncyBalls() self.step_count = 0 self.cumulative_action = np.zeros(6) print('created')
def __init__(self): super().__init__() #self.action_space = np.array([0,0]) #self.observation_space = np.array([0,0,0,0,0,0,0,0,0]) self.game = BouncyBalls() self.step_count = 0 #self.cumulative_action = np.zeros(6) self.pred_net = LSTM_Init_To_Many_3() self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.pred_net.load_state_dict( torch.load('preTrained/CP_epoch30.pth', map_location=self.device)) self.pred_net.to(device=self.device) self.pred_net.eval() print('created')
class ball_env_1(Env): """ Ball environment #1 Only one time step. """ def __init__(self): super().__init__() self.action_space = np.zeros(6) self.observation_space = np.zeros(1) self.game = BouncyBalls() print('created') def step(self, action): obs = np.zeros(1) ball_posi = self.game.run_one_episode(action) reward = self.posi_reward(ball_posi) done = True info = {} #print(reward) return obs, reward, done, info def posi_reward(self, posi): reward = 0.0 if posi[0] >= 550 and posi[1] > 150: reward = 1.0 return reward def reset(self): obs = np.zeros(1) return obs
class ball_env_4(Env): """ Ball environment #4 Ten time steps. At each time step, change position or let the ball go. No prediction model. """ action_space = np.zeros(6 + 1) # posi for three platforms + let the ball go observation_space = np.zeros(7) def __init__(self): super().__init__() #self.action_space = np.array([0,0]) #self.observation_space = np.array([0,0,0,0,0,0,0,0,0]) self.game = BouncyBalls() self.step_count = 0 self.cumulative_action = np.zeros(6) print('created') def step(self, action): self.step_count += 1 if action[6] > 0 or self.step_count >= 10: # let the ball go obs = action ball_posi = self.game.run_one_episode(self.cumulative_action) reward = self.posi_reward(ball_posi) # special case for leting the ball go on first step if self.step_count == 1: reward = 0 done = True # reset self.step_count = 0 #self.cumulative_action = np.array([0,0,0,0,0,0]) else: self.cumulative_action = action obs = action reward = 0 done = False #obs = np.array([0,0,0,0,0,0,0,0,0]) #for i in range(self.step_count): # obs[i*3] = 1 # obs[i*3+1:i*3+3] = action info = {} #print(reward) return obs, reward, done, info def posi_reward(self, posi): reward = 0.0 if posi[0] >= 550 and posi[1] > 150: reward = 1.0 return reward def reset(self): obs = self.observation_space return obs
class ball_env_3(Env): """ Ball environment #3 Three time steps, each for one platform. With predicted ball position as observation. """ action_space = np.zeros(2) observation_space = np.zeros(11) def __init__(self): super().__init__() #self.action_space = np.array([0,0]) #self.observation_space = np.array([0,0,0,0,0,0,0,0,0]) self.game = BouncyBalls() self.step_count = 0 self.cumulative_action = np.zeros(6) print('created') def step(self, action): self.step_count += 1 if self.step_count >= 3: # let the ball go ball_posi = self.game.run_one_episode(self.cumulative_action) reward = self.posi_reward(ball_posi) done = True # reset self.step_count = 0 self.cumulative_action = np.array([0, 0, 0, 0, 0, 0]) else: self.cumulative_action[self.step_count * 2 - 2:self.step_count * 2] = action reward = 0 done = False obs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]) for i in range(self.step_count): obs[i * 3] = 1 obs[i * 3 + 1:i * 3 + 3] = action info = {} #print(reward) return obs, reward, done, info def posi_reward(self, posi): reward = 0.0 if posi[0] >= 550 and posi[1] > 150: reward = 1.0 return reward def reset(self): obs = self.observation_space return obs
class ball_env_6(Env): """ Ball environment #6 Ten time steps. At each time step, change position or let the ball go. With ORACLE predicted ball position as observation. """ action_space = np.zeros(6 + 1) # posi for three platforms + let the ball go observation_space = np.zeros(7 + 2) # action + predicted ball posi def __init__(self): super().__init__() #self.action_space = np.array([0,0]) #self.observation_space = np.array([0,0,0,0,0,0,0,0,0]) self.game = BouncyBalls() self.step_count = 0 #self.cumulative_action = np.zeros(6) self.pred_net = LSTM_Init_To_Many_3() self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.pred_net.load_state_dict( torch.load('preTrained/CP_epoch30.pth', map_location=self.device)) self.pred_net.to(device=self.device) self.pred_net.eval() print('created') def step(self, action): self.step_count += 1 if action[6] > 0 or self.step_count >= 10: # let the ball go ball_posi = self.game.run_one_episode(action) reward = self.posi_reward(ball_posi) # special case for leting the ball go on first step if self.step_count == 1: reward = 0 done = True # reset self.step_count = 0 #self.cumulative_action = np.array([0,0,0,0,0,0]) else: #self.cumulative_action[self.step_count*2-2:self.step_count*2] = action reward = 0 done = False ball_posi = self.game.run_one_episode(action) last_posi = (ball_posi - np.array([300, 300])) / np.array([300, 300]) obs = np.zeros(9) obs[:7] = action obs[7:] = last_posi info = {} #print(reward) return obs, reward, done, info def posi_reward(self, posi): reward = 0.0 if posi[0] >= 550 and posi[1] > 150: reward = 1.0 return reward def reset(self): obs = self.observation_space return obs
class ball_env_5(Env): """ Ball environment #5 Ten time steps. At each time step, change position or let the ball go. With predicted ball position as observation. """ action_space = np.zeros(6 + 1) # posi for three platforms + let the ball go observation_space = np.zeros(7 + 2) # action + predicted ball posi def __init__(self): super().__init__() #self.action_space = np.array([0,0]) #self.observation_space = np.array([0,0,0,0,0,0,0,0,0]) self.game = BouncyBalls() self.step_count = 0 #self.cumulative_action = np.zeros(6) self.pred_net = LSTM_Init_To_Many_3() self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.pred_net.load_state_dict( torch.load('preTrained/CP_epoch30.pth', map_location=self.device)) self.pred_net.to(device=self.device) self.pred_net.eval() print('created') def step(self, action): self.step_count += 1 if action[6] > 0 or self.step_count >= 10: # let the ball go ball_posi = self.game.run_one_episode(action) reward = self.posi_reward(ball_posi) # special case for leting the ball go on first step if self.step_count == 1: reward = 0 done = True # reset self.step_count = 0 #self.cumulative_action = np.array([0,0,0,0,0,0]) else: #self.cumulative_action[self.step_count*2-2:self.step_count*2] = action reward = 0 done = False # prediction mean = np.array([30.893, 270.33, 200.388, 199.573, 350.057, 200.53]) std = np.array([ 14.54288661, 14.70269023, 14.31668453, 14.40488358, 14.85717843, 15.25080654 ]) normalized_platform_posi = (action[:6] - mean) / std with torch.no_grad(): pred_input = torch.from_numpy( np.expand_dims(normalized_platform_posi, axis=0)).float().to(self.device) pred_output = self.pred_net(pred_input).cpu().numpy() def get_pred_ball_posi(pred_output, x_min=20, x_max=550, y_min=50, y_max=550): mean = np.array([163.29437530326638, 279.7768839198992]) std = np.array([138.14349185245848, 113.09608505385799]) last_posi = pred_output[0, -1] pred_output_denormlilzed = pred_output * std + mean for i in range(pred_output.shape[1]): if pred_output_denormlilzed[ 0, i, 0] < x_min or pred_output_denormlilzed[ 0, i, 0] > x_max or pred_output_denormlilzed[ 0, i, 1] < y_min or pred_output_denormlilzed[ 0, i, 1] > y_max: last_posi = pred_output[0, i] break return last_posi last_posi = get_pred_ball_posi(pred_output) obs = np.zeros(9) obs[:7] = action obs[7:] = last_posi info = {} #print(reward) return obs, reward, done, info def posi_reward(self, posi): reward = 0.0 if posi[0] >= 550 and posi[1] > 150: reward = 1.0 return reward def reset(self): obs = self.observation_space return obs
def __init__(self): super().__init__() self.action_space = np.zeros(6) self.observation_space = np.zeros(1) self.game = BouncyBalls() print('created')