def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() if args.visual: action_names = distdeepq.actions_from_env(env) plot_machine = distdeepq.PlotMachine(dist_params, env.action_space.n, action_names) while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if args.visual: plot_machine.plot_distribution(np.array(obs)[None]) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() X = [] while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print("Score in this episode: " + str(info["rewards"][-1])) X.append(info["rewards"][-1]) num_episodes = len(info["rewards"]) print("Average Score so far: " + str(sum(X) / float(num_episodes)))
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() if args.show_observation: fig = plt.figure() im = plt.imshow(obs._frames[-1].reshape((84, 84)), cmap='Greys') plt.show(False) while True: env.unwrapped.render() if args.show_observation: im.set_data(obs._frames[-1].reshape((84, 84))) fig.canvas.draw() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def play(env, act, stochastic, video_path, clipped, num_trials=10): num_episodes = 0 video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() reward = 0 num_played = 0 rewardArray = [] while num_played < num_trials: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if clipped: rew = clip_score(rew) reward += rew if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) rewardArray.append(reward) reward = 0 num_played += 1 num_episodes = len(info["rewards"]) return {"Nonclipped": info["rewards"], "Clipped": rewardArray}
def play( env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv, ): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print("Reward: " + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print("Episode: " + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense): if defense == 'foresight': vf, game_screen_mean = load_visual_foresight(game_name) pred_obs = deque(maxlen=4) num_episodes = 0 video_recorder = None video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) t = 0 obs = env.reset() while True: #env.unwrapped.render() video_recorder.capture_frame() # Attack if craft_adv_obs != None: # Craft adv. examples adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0] action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] # Defense if t > 4 and defense == 'foresight': pred_obs.append( foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf, env.action_space.n, t)) if len(pred_obs) == 4: action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0] old_obs = obs old_action = action # RL loop obs, rew, done, info = env.step(action) t += 1 if done: t = 0 obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path, attack, m_target, m_adv): num_episodes = 0 num_moves = 0 num_transfer = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() # V: Attack # if attack is not None: # Craft adv. examples with m_adv.get_session().as_default(): adv_obs = \ craft_adv_obs(np.array(obs)[None], stochastic_adv=stochastic)[0] with m_target.get_session().as_default(): action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] action2 = act(np.array(obs)[None], stochastic=stochastic)[0] num_moves += 1 if action != action2: num_transfer += 1 else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print('Reward: ' + str(info["rewards"][-1])) num_episodes = len(info["rewards"]) print('Episode: ' + str(num_episodes)) success = float(num_transfer / num_moves) * 100.0 print("Percentage of successful attacks: " + str(success)) num_moves = 0 num_transfer = 0
def play(env, act, stochastic, video_path): num_episodes = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) obs = env.reset() while True: env.unwrapped.render() video_recorder.capture_frame() action = act(np.array(obs)[None], stochastic=stochastic)[0] obs, rew, done, info = env.step(action) if done: obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
batch_size=BATCH_SIZE, shuffle=True) match_env(env_real, env_sim) video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True) video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True) for i, data in enumerate(dataloader_train): for j in range(50): env_sim.render() env_real.render() action = data["actions"][0, j].numpy() video_recorder.capture_frame() video_recorder2.capture_frame() obs_real, _, _, _ = env_real.step(action.copy()) obs_simp, _, _, _ = env_sim.step(action.copy()) env_real.reset() env_sim.reset() match_env(env_real, env_sim) if i == 10: break video_recorder.close() video_recorder.enabled = False video_recorder2.close() video_recorder2.enabled = False