class Runner: def __init__(self, args): with open(args.src_filepath, "rb") as f: model = pickle.load(f) env = gym.make(args.env_name) self.model = model self.env = env self.n_iter = args.n_iter self.rec_flag = args.rec self.recorder = VideoRecorder(env, base_path=args.src_filepath) def __call__(self): for i in range(self.n_iter): score = self.get_score() print(score) def close(self): self.recorder.close() self.env.close() def get_score(self): env = self.env obs = env.reset() acc = 0 while True: y = self.model(obs) action = np.random.choice(len(y), p=F.softmax(y)) obs, reward, done, info = env.step(action) acc += reward if self.rec_flag: self.recorder.capture_frame() if done: break return acc
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() X = [] while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print("Score in this episode: " + str(info["rewards"][-1])) X.append(info["rewards"][-1]) num_episodes = len(info["rewards"]) print("Average Score so far: " + str(sum(X) / float(num_episodes)))
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() if args.show_observation: fig = plt.figure() im = plt.imshow(obs._frames[-1].reshape((84, 84)), cmap='Greys') plt.show(False) while True: env.unwrapped.render() if args.show_observation: im.set_data(obs._frames[-1].reshape((84, 84))) fig.canvas.draw() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def play(env, act, stochastic, video_path, clipped, num_trials=10): num_episodes = 0 video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() reward = 0 num_played = 0 rewardArray = [] while num_played < num_trials: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if clipped: rew = clip_score(rew) reward += rew if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) rewardArray.append(reward) reward = 0 num_played += 1 num_episodes = len(info["rewards"]) return {"Nonclipped": info["rewards"], "Clipped": rewardArray}
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() if args.visual: action_names = distdeepq.actions_from_env(env) plot_machine = distdeepq.PlotMachine(dist_params, env.action_space.n, action_names) while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if args.visual: plot_machine.plot_distribution(np.array(obs)[None]) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def main(): args = parse_args() env = make_atari(args.env) env = wrap_deepmind(env) # setup the model to process actions for one environment and one step at a time model = acktr_disc.Model(policies.CnnPolicy, env.observation_space, env.action_space, 1, 1) # load the trainable parameters from our trained file model.load(args.model_path) # keep track of the last 4 frames of observations env_width = env.observation_space.shape[0] env_height = env.observation_space.shape[1] obs_history = np.zeros((1, env_width, env_height, 4), dtype=np.uint8) # if we're supposed to show how the model sees the game if args.show_observation: obs = env.reset() import pygame from pygame import surfarray # the default size is too small, scale it up scale_factor = args.scale_factor screen = pygame.display.set_mode((env_width*scale_factor, env_height*scale_factor), 0, 8) # setup a gray palette pygame.display.set_palette(tuple([(i, i, i) for i in range(256)])) # if we're supposed to record video video_path = args.video_path if video_path is not None: video_recorder = VideoRecorder( env, base_path=video_path, enabled=video_path is not None) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() if args.show_observation: # use the Kronecker product to scale the array up for display, and also transpose x/y axes because pygame # displays as column/row instead of gym's row/column transposed = obs_history[0,:,:,-1].transpose((1,0)) scaled_array = np.uint8(np.kron(transposed, np.ones((scale_factor, scale_factor)))) surfarray.blit_array(screen, scaled_array) pygame.display.flip() if video_path is not None: video_recorder.capture_frame() # add the current observation onto our history list obs_history = np.roll(obs_history, shift=-1, axis=3) obs_history[:, :, :, -1] = obs[None][:, :, :, 0] # get the suggested action for the current observation history action, v, _ = model.step(obs_history) obs, rew, done, info = env.step(action) episode_rew += rew print("Episode reward", episode_rew) # if we're taking video, stop it now and clear video path so no more frames are added if we're out of lives or there are no lives in this game if video_path is not None and ('ale.lives' not in info or info['ale.lives'] == 0): video_path = None video_recorder.close()
def test_record_breaking_render_method(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.capture_frame() rec.close() assert rec.empty assert rec.broken assert not os.path.exists(rec.path)
def play( env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv, ): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print("Reward: " + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print("Episode: " + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def test_text_envs(): env = gym.make('FrozenLake-v0') video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense): if defense == 'foresight': vf, game_screen_mean = load_visual_foresight(game_name) pred_obs = deque(maxlen=4) num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) t = 0 obs = env.reset() while True: #env.unwrapped.render() video_recorder.capture_frame() # Attack if craft_adv_obs != None: # Craft adv. examples adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0] action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] # Defense if t > 4 and defense == 'foresight': pred_obs.append( foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf, env.action_space.n, t)) if len(pred_obs) == 4: action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0] old_obs = obs old_action = action # RL loop obs, rew, done, info = env.step(action) t += 1 if done: t = 0 obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def test_record_simple(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = \ craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print('Reward: ' + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print('Episode: ' + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def render(env, recorde=False): if recorde: rec = VideoRecorder(env) else: rec = None mean_reward = 0.0 mean_traj_reward = 0.0 max_run_time = 0.0 min_run_time = 1e+10 mean_run_time = 0.0 for i in range(5): total_reward = 0.0 traj_total_reward = 0.0 idx = 0 done = False obs = env.reset() while done == False: env.render() x = np.reshape(obs, [1, -1]) pred = rl_model.run(x, None) action = np.argmax(pred) obs, _, done, info = env.step(action) total_reward += _ traj_total_reward += hc_model.predict(obs.reshape([1, -1])) if rec != None: rec.capture_frame() idx += 1 if done or idx > 300: if idx > max_run_time: max_run_time = idx elif idx < min_run_time: min_run_time = idx mean_run_time += idx mean_reward += total_reward mean_traj_reward += traj_total_reward break if rec != None: rec.close() print "[ RunLength =",5," MeanReward =",mean_reward / 5.0, "MeantrajReward =",mean_traj_reward/5.0,\ " MeanRunTime =",mean_run_time / 5.0, " MaxRunTime =",max_run_time," MinRunTime =",min_run_time,"]"
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True) match_env(env_real, env_sim) video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True) video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True) for i, data in enumerate(dataloader_train): for j in range(50): env_sim.render() env_real.render() action = data["actions"][0, j].numpy() video_recorder.capture_frame() video_recorder2.capture_frame() obs_real, _, _, _ = env_real.step(action.copy()) obs_simp, _, _, _ = env_sim.step(action.copy()) env_real.reset() env_sim.reset() match_env(env_real, env_sim) if i == 10: break video_recorder.close() video_recorder.enabled = False video_recorder2.close() video_recorder2.enabled = False