class Paddle(RawEnvironment): ''' A fake environment that pretends that the paddle partion has been solved, gives three actions that produce desired behavior ''' def __init__(self, frameskip=1): self.num_actions = 3 self.itr = 0 self.save_path = "" self.screen = Screen(frameskip=frameskip) self.reward = 0 self.episode_rewards = self.screen.episode_rewards def set_save(self, itr, save_dir, recycle, all_dir=""): self.save_path = save_dir self.itr = itr self.recycle = recycle self.screen.save_path = save_dir self.screen.itr = itr self.screen.recycle = recycle self.all_dir = all_dir try: os.makedirs(save_dir) except OSError: pass def step(self, action): # TODO: action is tenor, might not be safe assumption action = action.clone() if action == 1: action[0] = 2 elif action == 2: action[0] = 3 raw_state, factor_state, done = self.screen.step(action, render=True) self.reward = self.screen.reward if factor_state["Action"][1][0] < 2: factor_state["Action"] = (factor_state["Action"][0], 0) elif factor_state["Action"][1][0] == 2: factor_state["Action"] = (factor_state["Action"][0], 1) elif factor_state["Action"][1][0] == 3: factor_state["Action"] = (factor_state["Action"][0], 2) return raw_state, factor_state, done def getState(self): raw_state, factor_state = self.screen.getState() if factor_state["Action"][1][0] < 2: factor_state["Action"] = (factor_state["Action"][0], 0) elif factor_state["Action"][1][0] == 2: factor_state["Action"] = (factor_state["Action"][0], 1) elif factor_state["Action"][1][0] == 3: factor_state["Action"] = (factor_state["Action"][0], 2) return raw_state, factor_state
class Ball(RawEnvironment): ''' A fake environment that pretends that the paddle partion has been solved, gives three actions that produce desired behavior ''' def __init__(self): self.num_actions = 4 self.itr = 0 self.save_path = "" self.screen = Screen() self.internal_screen = copy.deepcopy(screen) def step(self, action): if action == 1: action = 2 elif action == 2: action = 3 raw_state, factor_state = self.screen.getState() ball = factor_state["Ball"][0] ball_vel = self.screen.ball.vel if ball_vel[0] < 0 or ball[0] > 60: # ball is too far or moving up, so we don't care where it is # TODO: follow the ball else: self.internal_screen = copy.deepcopy(screen) while self.internal_screen.ball.pos[0] < 71: self.internal_screen.step([0]) self.objective_location = self.internal_screen.ball.pos[1] + np.random.choice([-1, 0, 1]) paddle = factor_state["Paddle"][0] raw_state, factor_state, done = self.screen.step(action) if factor_state["Action"][1] < 2: factor_state["Action"][1] = 0 elif factor_state["Action"][1] == 2: factor_state["Action"][1] = 1 elif factor_state["Action"][1] == 3: factor_state["Action"][1] = 2 def getState(self): raw_state, factor_state = self.screen.getState() if factor_state["Action"][1] < 2: factor_state["Action"][1] = 0 elif factor_state["Action"][1] == 2: factor_state["Action"][1] = 1 elif factor_state["Action"][1] == 3: factor_state["Action"][1] = 2
class FocusEnvironment(RawEnvironment): ''' A fake environment that pretends that the paddle partion has been solved, gives three actions that produce desired behavior ''' def __init__(self, focus_model): self.num_actions = 4 self.itr = 0 self.save_path = "" self.screen = Screen() self.focus_model = focus_model self.factor_state = None self.reward = 0 # self.focus_model.cuda() def set_save(self, itr, save_dir, recycle): self.save_path = save_dir self.itr = itr self.recycle = recycle self.screen.save_path = save_dir self.screen.itr = itr self.screen.recycle = recycle try: os.makedirs(save_dir) except OSError: pass def step(self, action): # TODO: action is tenor, might not be safe assumption t = time.time() raw_state, raw_factor_state, done = self.screen.step(action, render=True) self.reward = self.screen.reward factor_state = self.focus_model.forward(pytorch_model.wrap( raw_state, cuda=False).unsqueeze(0).unsqueeze(0), ret_numpy=True) for key in factor_state.keys(): factor_state[key] *= 84 factor_state[key] = (np.squeeze(factor_state[key]), (1.0, )) factor_state['Action'] = raw_factor_state['Action'] self.factor_state = factor_state if self.screen.itr != 0: object_dumps = open( os.path.join(self.save_path, "focus_dumps.txt"), 'a') else: object_dumps = open(os.path.join(self.save_path, "focus_dumps.txt"), 'w') # create file if it does not exist for key in factor_state.keys(): object_dumps.write( key + ":" + " ".join([str(fs) for fs in factor_state[key]]) + "\t") # TODO: attributes are limited to single floats object_dumps.write( "\n") # TODO: recycling does not stop object dumping # print("elapsed ", time.time() - t) return raw_state, factor_state, done def getState(self): raw_state, raw_factor_state = self.screen.getState() if self.factor_state is None: factor_state = self.focus_model.forward(pytorch_model.wrap( raw_state, cuda=False).unsqueeze(0).unsqueeze(0), ret_numpy=True) for key in factor_state.keys(): factor_state[key] *= 84 factor_state[key] = (np.squeeze(factor_state[key]), (1.0, )) factor_state['Action'] = raw_factor_state['Action'] self.factor_state = factor_state factor_state = self.factor_state return raw_state, factor_state