def __init__(self, frameskip=1): self.num_actions = 3 self.itr = 0 self.save_path = "" self.screen = Screen(frameskip=frameskip) self.reward = 0 self.episode_rewards = self.screen.episode_rewards
def __init__(self, focus_model): self.num_actions = 4 self.itr = 0 self.save_path = "" self.screen = Screen() self.focus_model = focus_model self.factor_state = None self.reward = 0
class Paddle(RawEnvironment): ''' A fake environment that pretends that the paddle partion has been solved, gives three actions that produce desired behavior ''' def __init__(self, frameskip=1): self.num_actions = 3 self.itr = 0 self.save_path = "" self.screen = Screen(frameskip=frameskip) self.reward = 0 self.episode_rewards = self.screen.episode_rewards def set_save(self, itr, save_dir, recycle, all_dir=""): self.save_path = save_dir self.itr = itr self.recycle = recycle self.screen.save_path = save_dir self.screen.itr = itr self.screen.recycle = recycle self.all_dir = all_dir try: os.makedirs(save_dir) except OSError: pass def step(self, action): # TODO: action is tenor, might not be safe assumption action = action.clone() if action == 1: action[0] = 2 elif action == 2: action[0] = 3 raw_state, factor_state, done = self.screen.step(action, render=True) self.reward = self.screen.reward if factor_state["Action"][1][0] < 2: factor_state["Action"] = (factor_state["Action"][0], 0) elif factor_state["Action"][1][0] == 2: factor_state["Action"] = (factor_state["Action"][0], 1) elif factor_state["Action"][1][0] == 3: factor_state["Action"] = (factor_state["Action"][0], 2) return raw_state, factor_state, done def getState(self): raw_state, factor_state = self.screen.getState() if factor_state["Action"][1][0] < 2: factor_state["Action"] = (factor_state["Action"][0], 0) elif factor_state["Action"][1][0] == 2: factor_state["Action"] = (factor_state["Action"][0], 1) elif factor_state["Action"][1][0] == 3: factor_state["Action"] = (factor_state["Action"][0], 2) return raw_state, factor_state
class Ball(RawEnvironment): ''' A fake environment that pretends that the paddle partion has been solved, gives three actions that produce desired behavior ''' def __init__(self): self.num_actions = 4 self.itr = 0 self.save_path = "" self.screen = Screen() self.internal_screen = copy.deepcopy(screen) def step(self, action): if action == 1: action = 2 elif action == 2: action = 3 raw_state, factor_state = self.screen.getState() ball = factor_state["Ball"][0] ball_vel = self.screen.ball.vel if ball_vel[0] < 0 or ball[0] > 60: # ball is too far or moving up, so we don't care where it is # TODO: follow the ball else: self.internal_screen = copy.deepcopy(screen) while self.internal_screen.ball.pos[0] < 71: self.internal_screen.step([0]) self.objective_location = self.internal_screen.ball.pos[1] + np.random.choice([-1, 0, 1]) paddle = factor_state["Paddle"][0] raw_state, factor_state, done = self.screen.step(action) if factor_state["Action"][1] < 2: factor_state["Action"][1] = 0 elif factor_state["Action"][1] == 2: factor_state["Action"][1] = 1 elif factor_state["Action"][1] == 3: factor_state["Action"][1] = 2 def getState(self): raw_state, factor_state = self.screen.getState() if factor_state["Action"][1] < 2: factor_state["Action"][1] = 0 elif factor_state["Action"][1] == 2: factor_state["Action"][1] = 1 elif factor_state["Action"][1] == 3: factor_state["Action"][1] = 2
def __init__(self): self.num_actions = 4 self.itr = 0 self.save_path = "" self.screen = Screen() self.internal_screen = copy.deepcopy(screen)
model.add_model('Ball', ball_model, ['Paddle'], augment_pt=f) #,augment_pt=util.JumpFiltering(2, 0.05)) #### if args.true_environment: model = None print(args.true_environment, args.env) if args.env == 'SelfPusher': if args.true_environment: true_environment = Pushing(pushgripper=True, frameskip=args.frameskip) else: true_environment = None # TODO: implement elif args.env == 'SelfBreakout': if args.true_environment: true_environment = Screen(frameskip=args.frameskip) else: true_environment = FocusEnvironment(model, display=args.display_focus) elif args.env.find('Atari') != -1: true_environment = FocusAtariEnvironment(model, args.env[len("Atari"):], args.seed, 0, args.save_dir) dataset_path = args.record_rollouts changepoint_path = args.changepoint_dir option_chain = OptionChain(true_environment, args.changepoint_dir, args.train_edge, args) reward_paths = glob.glob(os.path.join(option_chain.save_dir, "*rwd.pkl")) print(reward_paths) reward_paths.sort(key=lambda x: int(x.split("__")[2]))
# train-edge # state-forms # state-names # Example usage: # python paddle_bounce.py --model-form tab --optimizer-form TabQ --record-rollouts "data/action/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 100000 --save-dir data/paddleballtest --state-forms prox --state-names Paddle --base-node Paddle --changepoint-dir data/paddlegraph --factor 8 --greedy-epsilon .2 --lr .01 --normalize --behavior-policy egq --gamma .99 > out.txt # python paddle_bounce.py --model-form fourier --optimizer-form SARSA --record-rollouts "data/action/" --train-edge "Paddle->Ball" --num-stack 2 --train --num-iters 100000 --save-dir data/paddleballpg --state-forms xprox --state-names Paddle --base-node Paddle --changepoint-dir data/paddlegraphpg --factor 10 --num-layers 1 --greedy-epsilon .1 --lr .001 --normalize --behavior-policy egq --save-dir data/xstates/ --optim base > out.txt # python dopamine_paddle.py --record-rollouts data/integrationpaddle --changepoint-dir data/dopegraph --model-form rainbow --true-environment --train-edge "Action->Reward" --state-forms raw --state-names Action --num-steps 5 --num-stack 4 --num-iters 2000000 --log-interval 200 --save-dir ../datasets/caleb_data/dopamine/rainbow/ --optim base > baselines/rainbow.txt # python dopamine_paddle.py --record-rollouts data/extragripper --changepoint-dir data/dopepushgraph --model-form rainbow --true-environment --train-edge "Action->Reward" --state-forms raw --state-names Action --num-steps 5 --num-stack 4 --num-iters 10000000 --log-interval 200 --save-dir ../datasets/caleb_data/dopamine/rainbowpushing/ --optim base --env SelfPusher > pushingrainbow.txt # python dopamine_paddle.py --record-rollouts data/extragripper --changepoint-dir data/dopepushgraph --model-form rainbow --true-environment --train-edge "Action->Reward" --state-forms bounds bounds bounds prox prox --state-names Gripper Block Target Gripper__Block Block__Target --num-steps 5 --num-stack 1 --num-iters 10000000 --log-interval 200 --save-dir ../datasets/caleb_data/dopamine/rainbowpushing/ --optim base --env SelfPusher --gpu 3 --frameskip 3 --normalize --reward-form rawdist > pushingrainbowstate.txt args = get_args() # true_environment = Paddle() # true_environment = PaddleNoBlocks() if args.env == "SelfPusher": true_environment = Pushing(True, frameskip=args.frameskip) else: true_environment = Screen() dataset_path = args.record_rollouts changepoint_path = args.changepoint_dir option_chain = OptionChain(true_environment, args.changepoint_dir, args.train_edge, args) if args.reward_form == 'rawdist' and args.env == 'SelfPusher': true_environment.use_distance_reward() args.reward_form = 'raw' head, tail = get_edge(args.train_edge) reward_classes = [BlockReward(args)] if args.reward_form == 'x': reward_classes = [Xreward(args)] elif args.reward_form.find('move_dirall') != -1:
def __init__(self): self.num_actions = 3 self.itr = 0 self.save_path = "" self.screen = Screen() self.reward = 0
class FocusEnvironment(RawEnvironment): ''' A fake environment that pretends that the paddle partion has been solved, gives three actions that produce desired behavior ''' def __init__(self, focus_model): self.num_actions = 4 self.itr = 0 self.save_path = "" self.screen = Screen() self.focus_model = focus_model self.factor_state = None self.reward = 0 # self.focus_model.cuda() def set_save(self, itr, save_dir, recycle): self.save_path = save_dir self.itr = itr self.recycle = recycle self.screen.save_path = save_dir self.screen.itr = itr self.screen.recycle = recycle try: os.makedirs(save_dir) except OSError: pass def step(self, action): # TODO: action is tenor, might not be safe assumption t = time.time() raw_state, raw_factor_state, done = self.screen.step(action, render=True) self.reward = self.screen.reward factor_state = self.focus_model.forward(pytorch_model.wrap( raw_state, cuda=False).unsqueeze(0).unsqueeze(0), ret_numpy=True) for key in factor_state.keys(): factor_state[key] *= 84 factor_state[key] = (np.squeeze(factor_state[key]), (1.0, )) factor_state['Action'] = raw_factor_state['Action'] self.factor_state = factor_state if self.screen.itr != 0: object_dumps = open( os.path.join(self.save_path, "focus_dumps.txt"), 'a') else: object_dumps = open(os.path.join(self.save_path, "focus_dumps.txt"), 'w') # create file if it does not exist for key in factor_state.keys(): object_dumps.write( key + ":" + " ".join([str(fs) for fs in factor_state[key]]) + "\t") # TODO: attributes are limited to single floats object_dumps.write( "\n") # TODO: recycling does not stop object dumping # print("elapsed ", time.time() - t) return raw_state, factor_state, done def getState(self): raw_state, raw_factor_state = self.screen.getState() if self.factor_state is None: factor_state = self.focus_model.forward(pytorch_model.wrap( raw_state, cuda=False).unsqueeze(0).unsqueeze(0), ret_numpy=True) for key in factor_state.keys(): factor_state[key] *= 84 factor_state[key] = (np.squeeze(factor_state[key]), (1.0, )) factor_state['Action'] = raw_factor_state['Action'] self.factor_state = factor_state factor_state = self.factor_state return raw_state, factor_state