def _enjoy(): # Launch the env with our helper function env = launch_env() print("Initialized environment") # Wrappers env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120 env = ActionWrapper(env) env = DtRewardWrapper(env) print("Initialized Wrappers") state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Initialize policy policy = DDPG(state_dim, action_dim, max_action, net_type="cnn") policy.load(filename='ddpg', directory='reinforcement/pytorch/models/') obs = env.reset() done = False while True: while not done: action = policy.predict(np.array(obs)) # Perform action obs, reward, done, _ = env.step(action) env.render() done = False obs = env.reset()
def _enjoyWindow(): model = WindowModel(action_dim=2, max_action=1.) try: state_dict = torch.load('./models/windowimitate.pt') model.load_state_dict(state_dict) except: print('failed to load model') exit() model.eval().to(device) env = launch_env1() env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) obs = env.reset() obsWindow = np.zeros((12, 160, 120)) while True: obsWindow[:9, :, :] = obsWindow[3:, :, :] obsWindow[9:12, :, :] = obs obs = torch.from_numpy(obsWindow).float().to(device).unsqueeze(0) action = model(obs) action = action.squeeze().data.cpu().numpy() obs, reward, done, info = env.step(action) env.render() if done: if reward < 0: print('*** FAILED ***') time.sleep(0.7) obs = env.reset() env.render()
def _enjoy(): model = Model(action_dim=2, max_action=1.) try: state_dict = torch.load('trained_models/imitate.pt', map_location=device) model.load_state_dict(state_dict) except: print('failed to load model') exit() model.eval().to(device) env = launch_env() env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) obs = env.reset() while True: obs = torch.from_numpy(obs).float().to(device).unsqueeze(0) action = model(obs) action = action.squeeze().data.cpu().numpy() obs, reward, done, info = env.step(action) env.render() if done: if reward < 0: print('*** FAILED ***') time.sleep(0.7) obs = env.reset() env.render()
def _enjoy(args): # Launch the env with our helper function env = launch_env() print("Initialized environment") # Wrappers env = ResizeWrapper(env) env = GrayscaleWrapper(env) env = NormalizeWrapper(env) env = FrameStack(env, 4) env = DtRewardWrapper(env) env = ActionWrapper(env) print("Initialized Wrappers") state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Initialize policy # policy = TD3(state_dim, action_dim, max_action, net_type="cnn") # policy.load(filename=args.policy, directory='reinforcement/pytorch/models/') policy = policies[args.policy](state_dim, action_dim, max_action) policy.load("reinforcement/pytorch/models/", args.policy) obs = env.reset() done = False while True: while not done: action = policy.predict(np.array(obs)) # Perform action obs, reward, done, _ = env.step(action) env.render() done = False obs = env.reset()
def initWindowModel(): model = WindowModel(action_dim=2, max_action=1.) try: state_dict = torch.load('./models/windowimitate.pt') model.load_state_dict(state_dict) except: print('failed to load model') exit() model.eval().to(device) env = launch_env1() env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) env.reset() env.render() return env, model
def _dagger(): model = Model(action_dim=2, max_action=1.) try: state_dict = torch.load('./models/imitate.pt') model.load_state_dict(state_dict) except: print('failed to load model') exit() model.eval().to(device) env = launch_env1() # Register a keyboard handler env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) obs = env.reset() env.render() key_handler = key.KeyStateHandler() env.unwrapped.window.push_handlers(key_handler) print(env.map_name) raise Exception("asdfsadf") obsHistory = [] actionHistory = [] while True: obs = torch.from_numpy(obs).float().to(device).unsqueeze(0) action = model(obs) action = action.squeeze().data.cpu().numpy() obs, reward, done, info = env.step(action) print(key_handler) daggerAction = np.array([0.0, 0.0]) if key_handler[key.UP]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([1.00, 0.0]) #action = np.array([0.44, 0.0]) if key_handler[key.DOWN]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([-1.00, 0]) #action = np.array([-0.44, 0]) if key_handler[key.LEFT]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([0.35, +1]) if key_handler[key.RIGHT]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([0.35, -1]) if key_handler[key.SPACE]: obsHistoryArray = np.array(obsHistory) actionHistoryArray = np.array(actionHistory) np.save('./dagger/obs_{}.npy'.format(len(count)), obsHistoryArray) np.save('./dagger/actions_{}.npy'.format(len(count)), actionHistoryArray) print(daggerAction) obsHistory.append(obs) actionHistory.append(daggerAction) env.render() if done: if reward < 0: print('*** FAILED ***') time.sleep(0.7) obs = env.reset() env.render()