Beispiel #1
0
def play(env, act, stochastic, video_path, clipped, num_trials=10):
    num_episodes = 0
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    reward = 0
    num_played = 0
    rewardArray = []
    while num_played < num_trials:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if clipped:
            rew = clip_score(rew)
        reward += rew
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            rewardArray.append(reward)
            reward = 0
            num_played += 1
            num_episodes = len(info["rewards"])
    return {"Nonclipped": info["rewards"], "Clipped": rewardArray}
Beispiel #2
0
class Runner:
    def __init__(self, args):
        with open(args.src_filepath, "rb") as f:
            model = pickle.load(f)
        env = gym.make(args.env_name)
        self.model = model
        self.env = env
        self.n_iter = args.n_iter
        self.rec_flag = args.rec
        self.recorder = VideoRecorder(env, base_path=args.src_filepath)

    def __call__(self):
        for i in range(self.n_iter):
            score = self.get_score()
            print(score)

    def close(self):
        self.recorder.close()
        self.env.close()

    def get_score(self):
        env = self.env
        obs = env.reset()
        acc = 0
        while True:
            y = self.model(obs)
            action = np.random.choice(len(y), p=F.softmax(y))
            obs, reward, done, info = env.step(action)
            acc += reward
            if self.rec_flag:
                self.recorder.capture_frame()
            if done:
                break
        return acc
def test_no_frames():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.close()
    assert rec.empty
    assert rec.functional
    assert not os.path.exists(rec.path)
def play(env, act, stochastic, video_path, nb_atoms):
    num_episodes = 0
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    if args.visual:
        action_names = dqn_core.actions_from_env(env)
        plot_machine = PlotMachine(nb_atoms, env.action_space.n, action_names)
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], args.alpha, stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if args.visual:
            plot_machine.plot_distribution(np.array(obs)[None])

        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Beispiel #5
0
def main():
    args = parse_args()
    
    env = make_atari(args.env)
    env = wrap_deepmind(env)
    
    # setup the model to process actions for one environment and one step at a time
    model = acktr_disc.Model(policies.CnnPolicy, env.observation_space, env.action_space, 1, 1)
    # load the trainable parameters from our trained file
    model.load(args.model_path)
    # keep track of the last 4 frames of observations
    env_width = env.observation_space.shape[0]
    env_height = env.observation_space.shape[1]
    obs_history = np.zeros((1, env_width, env_height, 4), dtype=np.uint8)

    # if we're supposed to show how the model sees the game
    if args.show_observation:
        obs = env.reset()
        import pygame
        from pygame import surfarray
        # the default size is too small, scale it up
        scale_factor = args.scale_factor
        screen = pygame.display.set_mode((env_width*scale_factor, env_height*scale_factor), 0, 8)
        # setup a gray palette
        pygame.display.set_palette(tuple([(i, i, i) for i in range(256)]))
        
    # if we're supposed to record video
    video_path = args.video_path
    if video_path is not None:
        video_recorder = VideoRecorder(
        env, base_path=video_path, enabled=video_path is not None)
        
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            if args.show_observation:
                # use the Kronecker product to scale the array up for display, and also transpose x/y axes because pygame
                # displays as column/row instead of gym's row/column
                transposed = obs_history[0,:,:,-1].transpose((1,0))
                scaled_array = np.uint8(np.kron(transposed, np.ones((scale_factor, scale_factor))))
                surfarray.blit_array(screen, scaled_array)
                pygame.display.flip()
            if video_path is not None:
                video_recorder.capture_frame()
            # add the current observation onto our history list
            obs_history = np.roll(obs_history, shift=-1, axis=3)
            obs_history[:, :, :, -1] = obs[None][:, :, :, 0]
            # get the suggested action for the current observation history
            action, v, _ = model.step(obs_history)
            
            obs, rew, done, info = env.step(action)
            episode_rew += rew
        print("Episode reward", episode_rew)
        # if we're taking video, stop it now and clear video path so no more frames are added if we're out of lives or there are no lives in this game
        if video_path is not None and ('ale.lives' not in info or info['ale.lives'] == 0):
            video_path = None
            video_recorder.close()
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
Beispiel #9
0
def play(
    env,
    act,
    craft_adv_obs,
    craft_adv_obs2,
    stochastic,
    video_path,
    attack,
    m_target,
    m_adv,
):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = craft_adv_obs(np.array(obs)[None],
                                        stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print("Reward: " + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print("Episode: " + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Beispiel #12
0
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack,
         defense):
    if defense == 'foresight':
        vf, game_screen_mean = load_visual_foresight(game_name)
        pred_obs = deque(maxlen=4)

    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)

    t = 0
    obs = env.reset()
    while True:
        #env.unwrapped.render()
        video_recorder.capture_frame()

        # Attack
        if craft_adv_obs != None:
            # Craft adv. examples
            adv_obs = craft_adv_obs(np.array(obs)[None],
                                    stochastic=stochastic)[0]
            action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

# Defense
        if t > 4 and defense == 'foresight':
            pred_obs.append(
                foresee(U.get_session(), old_obs, old_action, np.array(obs),
                        game_screen_mean, vf, env.action_space.n, t))
            if len(pred_obs) == 4:
                action = act(np.stack(pred_obs, axis=2)[None],
                             stochastic=stochastic)[0]

        old_obs = obs
        old_action = action

        # RL loop
        obs, rew, done, info = env.step(action)
        t += 1
        if done:
            t = 0
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Beispiel #14
0
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path,
         attack, m_target, m_adv):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = \
                    craft_adv_obs(np.array(obs)[None],
                                  stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None],
                             stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print('Reward: ' + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print('Episode: ' + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0
def render(env, recorde=False):
    if recorde:
        rec = VideoRecorder(env)
    else:
        rec = None

    mean_reward = 0.0
    mean_traj_reward = 0.0
    max_run_time = 0.0
    min_run_time = 1e+10
    mean_run_time = 0.0
    for i in range(5):
        total_reward = 0.0
        traj_total_reward = 0.0

        idx = 0
        done = False
        obs = env.reset()
        while done == False:
            env.render()
            x = np.reshape(obs, [1, -1])
            pred = rl_model.run(x, None)
            action = np.argmax(pred)

            obs, _, done, info = env.step(action)
            total_reward += _
            traj_total_reward += hc_model.predict(obs.reshape([1, -1]))

            if rec != None:
                rec.capture_frame()

            idx += 1
            if done or idx > 300:

                if idx > max_run_time:
                    max_run_time = idx
                elif idx < min_run_time:
                    min_run_time = idx
                mean_run_time += idx

                mean_reward += total_reward
                mean_traj_reward += traj_total_reward
                break
    if rec != None:
        rec.close()
    print "[ RunLength =",5," MeanReward =",mean_reward / 5.0, "MeantrajReward =",mean_traj_reward/5.0,\
      " MeanRunTime =",mean_run_time / 5.0, " MaxRunTime =",max_run_time," MinRunTime =",min_run_time,"]"
Beispiel #16
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Beispiel #17
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
def test_record_unrecordable_method():
    env = UnrecordableEnv()
    rec = VideoRecorder(env)
    assert not rec.enabled
    rec.close()
Beispiel #19
0
                              batch_size=BATCH_SIZE,
                              shuffle=True)

match_env(env_real, env_sim)
video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True)
video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True)

for i, data in enumerate(dataloader_train):
    for j in range(50):
        env_sim.render()
        env_real.render()

        action = data["actions"][0, j].numpy()

        video_recorder.capture_frame()
        video_recorder2.capture_frame()

        obs_real, _, _, _ = env_real.step(action.copy())
        obs_simp, _, _, _ = env_sim.step(action.copy())

    env_real.reset()
    env_sim.reset()
    match_env(env_real, env_sim)
    if i == 10:
        break

video_recorder.close()
video_recorder.enabled = False
video_recorder2.close()
video_recorder2.enabled = False
                utils.to_bin(pole_angle, pole_angle_bins),
                utils.to_bin(angle_rate_of_change, angle_rate_bins)
            ])
            if not (done):
                #qlearn.learn(state, action, reward, nextState)
                state = nextState
                rewards_sum += reward
            else:
                # Q-learn stuff
                reward = -200
                rewards_sum += reward
                break

        print(rewards_sum)
        test_reward.append(rewards_sum)
        record.close()

    print("Test Meaning_value: {}".format(np.mean(test_reward)))
    print("Test std: {}".format(np.std(test_reward)))
    # print("length: ")
    print("Test finished...")

    x = np.asarray(x, dtype=int)
    reward_y = np.asarray(reward_y, dtype=float)

    plt.figure()
    plt.plot(x, reward_y)
    plt.savefig('cartpole.png')
    plt.show()

    trajectory = last_time_steps.tolist()
class VideoWrapper(Wrapper):
    """Creates videos from wrapped environment by called render after each timestep."""
    def __init__(self, env, directory, single_video=True):
        """

        :param env: (gym.Env) the wrapped environment.
        :param directory: the output directory.
        :param single_video: (bool) if True, generates a single video file, with episodes
                             concatenated. If False, a new video file is created for each episode.
                             Usually a single video file is what is desired. However, if one is
                             searching for an interesting episode (perhaps by looking at the
                             metadata), saving to different files can be useful.
        """
        super(VideoWrapper, self).__init__(env)
        self.episode_id = 0
        self.video_recorder = None
        self.single_video = single_video

        self.directory = osp.abspath(directory)

        # Make sure to not put multiple different runs in the same directory,
        # if the directory already exists
        error_msg = "You're trying to use the same directory twice, " \
                    "this would result in files being overwritten"
        assert not os.path.exists(self.directory), error_msg
        os.makedirs(self.directory, exist_ok=True)

    def _step(self, action):
        obs, rew, done, info = self.env.step(action)
        if done:
            winners = [i for i, d in info.items() if 'winner' in d]
            metadata = {'winners': winners}
            self.video_recorder.metadata.update(metadata)
        self.video_recorder.capture_frame()
        return obs, rew, done, info

    def _reset(self):
        self._reset_video_recorder()
        self.episode_id += 1
        return self.env.reset()

    def _reset_video_recorder(self):
        """Called at the start of each episode (by _reset). Always creates a video recorder
           if one does not already exist. When a video recorder is already present, it will only
           create a new one if `self.single_video == False`."""
        if self.video_recorder is not None:
            # Video recorder already started.
            if not self.single_video:
                # We want a new video for each episode, so destroy current recorder.
                self.video_recorder.close()
                self.video_recorder = None

        if self.video_recorder is None:
            # No video recorder -- start a new one.
            self.video_recorder = VideoRecorder(
                env=self.env,
                base_path=osp.join(self.directory,
                                   'video.{:06}'.format(self.episode_id)),
                metadata={'episode_id': self.episode_id},
            )

    def _close(self):
        if self.video_recorder is not None:
            self.video_recorder.close()
            self.video_recorder = None
        super(VideoWrapper, self)._close()
def test_record_unrecordable_method():
    env = UnrecordableEnv()
    rec = VideoRecorder(env)
    assert not rec.enabled
    rec.close()