def test_no_frames():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.close()
    assert rec.empty
    assert rec.functional
    assert not os.path.exists(rec.path)
Example #2
0
def test_no_frames():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.close()
    assert rec.empty
    assert rec.functional
    assert not os.path.exists(rec.path)
Example #3
0
class Runner:
    def __init__(self, args):
        with open(args.src_filepath, "rb") as f:
            model = pickle.load(f)
        env = gym.make(args.env_name)
        self.model = model
        self.env = env
        self.n_iter = args.n_iter
        self.rec_flag = args.rec
        self.recorder = VideoRecorder(env, base_path=args.src_filepath)

    def __call__(self):
        for i in range(self.n_iter):
            score = self.get_score()
            print(score)

    def close(self):
        self.recorder.close()
        self.env.close()

    def get_score(self):
        env = self.env
        obs = env.reset()
        acc = 0
        while True:
            y = self.model(obs)
            action = np.random.choice(len(y), p=F.softmax(y))
            obs, reward, done, info = env.step(action)
            acc += reward
            if self.rec_flag:
                self.recorder.capture_frame()
            if done:
                break
        return acc
Example #4
0
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
Example #5
0
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
Example #6
0
 def __init__(self, args):
     with open(args.src_filepath, "rb") as f:
         model = pickle.load(f)
     env = gym.make(args.env_name)
     self.model = model
     self.env = env
     self.n_iter = args.n_iter
     self.rec_flag = args.rec
     self.recorder = VideoRecorder(env, base_path=args.src_filepath)
Example #7
0
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Example #8
0
    def sample(self, horizon, policy, record_fname=None):
        """Samples a rollout from the agent.

        Arguments: 
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.

        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(
            self.env, record_fname)

        times, rewards = [], []
        O, A, plan_hors, reward_sum, done = [self.env.reset()
                                             ], [], [], 0, False

        policy.reset()
        for t in range(horizon):
            if video_record:
                recorder.capture_frame()
            start = time.time()
            a, plan_hor = policy.act(O[t], t)
            A.append(a)
            plan_hors.append(plan_hor)
            times.append(time.time() - start)

            if self.noise_stddev is None:
                obs, reward, done, info = self.env.step(A[t])
            else:
                action = A[t] + np.random.normal(
                    loc=0, scale=self.noise_stddev, size=[self.dU])
                action = np.minimum(
                    np.maximum(action, self.env.action_space.low),
                    self.env.action_space.high)
                obs, reward, done, info = self.env.step(action)
            O.append(obs)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        print("Average action selection time: ", np.mean(times))
        print("Rollout length: ", len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
            "plan_hor": np.array(plan_hors)
        }
Example #9
0
def play(env, act, stochastic, video_path, clipped, num_trials=10):
    num_episodes = 0
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    reward = 0
    num_played = 0
    rewardArray = []
    while num_played < num_trials:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if clipped:
            rew = clip_score(rew)
        reward += rew
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            rewardArray.append(reward)
            reward = 0
            num_played += 1
            num_episodes = len(info["rewards"])
    return {"Nonclipped": info["rewards"], "Clipped": rewardArray}
Example #10
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    if args.show_observation:
        fig = plt.figure()
        im = plt.imshow(obs._frames[-1].reshape((84, 84)), cmap='Greys')
        plt.show(False)
    while True:
        env.unwrapped.render()
        if args.show_observation:
            im.set_data(obs._frames[-1].reshape((84, 84)))
            fig.canvas.draw()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    if args.visual:
        action_names = distdeepq.actions_from_env(env)
        plot_machine = distdeepq.PlotMachine(dist_params, env.action_space.n,
                                             action_names)
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if args.visual:
            plot_machine.plot_distribution(np.array(obs)[None])

        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Example #12
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    X = []
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print("Score in this episode: " + str(info["rewards"][-1]))
            X.append(info["rewards"][-1])
            num_episodes = len(info["rewards"])
            print("Average Score so far: " + str(sum(X) / float(num_episodes)))
Example #13
0
    def _reset_video_recorder(self):
        """Called at the start of each episode (by _reset). Always creates a video recorder
           if one does not already exist. When a video recorder is already present, it will only
           create a new one if `self.single_video == False`."""
        if self.video_recorder is not None:
            # Video recorder already started.
            if not self.single_video:
                # We want a new video for each episode, so destroy current recorder.
                self.video_recorder.close()
                self.video_recorder = None

        if self.video_recorder is None:
            # No video recorder -- start a new one.
            self.video_recorder = VideoRecorder(
                env=self.env,
                base_path=osp.join(self.directory,
                                   'video.{:06}'.format(self.episode_id)),
                metadata={'episode_id': self.episode_id},
            )
Example #14
0
def play(
    env,
    act,
    craft_adv_obs,
    craft_adv_obs2,
    stochastic,
    video_path,
    attack,
    m_target,
    m_adv,
):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = craft_adv_obs(np.array(obs)[None],
                                        stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print("Reward: " + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print("Episode: " + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0
Example #15
0
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack,
         defense):
    if defense == 'foresight':
        vf, game_screen_mean = load_visual_foresight(game_name)
        pred_obs = deque(maxlen=4)

    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)

    t = 0
    obs = env.reset()
    while True:
        #env.unwrapped.render()
        video_recorder.capture_frame()

        # Attack
        if craft_adv_obs != None:
            # Craft adv. examples
            adv_obs = craft_adv_obs(np.array(obs)[None],
                                    stochastic=stochastic)[0]
            action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

# Defense
        if t > 4 and defense == 'foresight':
            pred_obs.append(
                foresee(U.get_session(), old_obs, old_action, np.array(obs),
                        game_screen_mean, vf, env.action_space.n, t))
            if len(pred_obs) == 4:
                action = act(np.stack(pred_obs, axis=2)[None],
                             stochastic=stochastic)[0]

        old_obs = obs
        old_action = action

        # RL loop
        obs, rew, done, info = env.step(action)
        t += 1
        if done:
            t = 0
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Example #16
0
def main():
    args = parse_args()
    
    env = make_atari(args.env)
    env = wrap_deepmind(env)
    
    # setup the model to process actions for one environment and one step at a time
    model = acktr_disc.Model(policies.CnnPolicy, env.observation_space, env.action_space, 1, 1)
    # load the trainable parameters from our trained file
    model.load(args.model_path)
    # keep track of the last 4 frames of observations
    env_width = env.observation_space.shape[0]
    env_height = env.observation_space.shape[1]
    obs_history = np.zeros((1, env_width, env_height, 4), dtype=np.uint8)

    # if we're supposed to show how the model sees the game
    if args.show_observation:
        obs = env.reset()
        import pygame
        from pygame import surfarray
        # the default size is too small, scale it up
        scale_factor = args.scale_factor
        screen = pygame.display.set_mode((env_width*scale_factor, env_height*scale_factor), 0, 8)
        # setup a gray palette
        pygame.display.set_palette(tuple([(i, i, i) for i in range(256)]))
        
    # if we're supposed to record video
    video_path = args.video_path
    if video_path is not None:
        video_recorder = VideoRecorder(
        env, base_path=video_path, enabled=video_path is not None)
        
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            if args.show_observation:
                # use the Kronecker product to scale the array up for display, and also transpose x/y axes because pygame
                # displays as column/row instead of gym's row/column
                transposed = obs_history[0,:,:,-1].transpose((1,0))
                scaled_array = np.uint8(np.kron(transposed, np.ones((scale_factor, scale_factor))))
                surfarray.blit_array(screen, scaled_array)
                pygame.display.flip()
            if video_path is not None:
                video_recorder.capture_frame()
            # add the current observation onto our history list
            obs_history = np.roll(obs_history, shift=-1, axis=3)
            obs_history[:, :, :, -1] = obs[None][:, :, :, 0]
            # get the suggested action for the current observation history
            action, v, _ = model.step(obs_history)
            
            obs, rew, done, info = env.step(action)
            episode_rew += rew
        print("Episode reward", episode_rew)
        # if we're taking video, stop it now and clear video path so no more frames are added if we're out of lives or there are no lives in this game
        if video_path is not None and ('ale.lives' not in info or info['ale.lives'] == 0):
            video_path = None
            video_recorder.close()
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
    def sample(self, horizon, policy, record_fname=None):
        """Samples a rollout from the agent.
        Arguments:
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.
        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        # default to be False
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(
            self.env, record_fname)

        times, rewards = [], []
        O, A, reward_sum, done = [self.env.reset()], [], 0, False

        policy.reset()

        for t in range(horizon):
            if video_record:
                recorder.capture_frame()
            start = time.time()
            A.append(policy.act(O[t], t))
            # print(O[t].shape, A[t].shape)
            times.append(time.time() - start)
            obs, reward, done, info = self.env.step(A[t])

            O.append(obs)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        print("Average action selection time: ", np.mean(times))
        print("Rollout length: ", len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
        }
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Example #21
0
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path,
         attack, m_target, m_adv):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = \
                    craft_adv_obs(np.array(obs)[None],
                                  stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None],
                             stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print('Reward: ' + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print('Episode: ' + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0
def render(env, recorde=False):
    if recorde:
        rec = VideoRecorder(env)
    else:
        rec = None

    mean_reward = 0.0
    mean_traj_reward = 0.0
    max_run_time = 0.0
    min_run_time = 1e+10
    mean_run_time = 0.0
    for i in range(5):
        total_reward = 0.0
        traj_total_reward = 0.0

        idx = 0
        done = False
        obs = env.reset()
        while done == False:
            env.render()
            x = np.reshape(obs, [1, -1])
            pred = rl_model.run(x, None)
            action = np.argmax(pred)

            obs, _, done, info = env.step(action)
            total_reward += _
            traj_total_reward += hc_model.predict(obs.reshape([1, -1]))

            if rec != None:
                rec.capture_frame()

            idx += 1
            if done or idx > 300:

                if idx > max_run_time:
                    max_run_time = idx
                elif idx < min_run_time:
                    min_run_time = idx
                mean_run_time += idx

                mean_reward += total_reward
                mean_traj_reward += traj_total_reward
                break
    if rec != None:
        rec.close()
    print "[ RunLength =",5," MeanReward =",mean_reward / 5.0, "MeantrajReward =",mean_traj_reward/5.0,\
      " MeanRunTime =",mean_run_time / 5.0, " MaxRunTime =",max_run_time," MinRunTime =",min_run_time,"]"
Example #23
0
def play(env, act, stochastic, video_path):
    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()
        action = act(np.array(obs)[None], stochastic=stochastic)[0]
        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])
Example #24
0
    def sample(self, horizon, policy, record_fname=None, test_policy=False, average=False):
        """Samples a rollout from the agent.

        Arguments:
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.

        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        if test_policy:
            logger.info('Testing the policy')
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(self.env, record_fname)

        times, rewards = [], []
        O, A, reward_sum, done = [self.env.reset()], [], 0, False
        self._debug += 1

        policy.reset()
        # for t in range(20):
        for t in range(horizon):
            if hasattr(self.env, 'render_imitation'):
                self.env.render_imitation()
            if t % 50 == 10 and t > 1:
                logger.info('Current timesteps: %d / %d, average time: %.5f'
                            % (t, horizon, np.mean(times)))
            if video_record:
                recorder.capture_frame()
            start = time.time()
            if test_policy:
                A.append(policy.act(O[t], t, test_policy=test_policy, average=average))
            else:
                A.append(policy.act(O[t], t))
            times.append(time.time() - start)

            if self.noise_stddev is None:
                obs, reward, done, info = self.env.step(A[t])
            else:
                action = A[t] + np.random.normal(loc=0, scale=self.noise_stddev,
                                                 size=[self.dU])
                action = np.minimum(np.maximum(action,
                                               self.env.action_space.low),
                                    self.env.action_space.high)
                obs, reward, done, info = self.env.step(action)
            O.append(obs)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        logger.info("Average action selection time: %.4f" % np.mean(times))
        logger.info("Rollout length: %d" % len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
        }
Example #25
0
def test_record_unrecordable_method():
    env = UnrecordableEnv()
    rec = VideoRecorder(env)
    assert not rec.enabled
    rec.close()
    test_reward = []

    for i_episode in range(200):
        observation = env.reset()

        rewards_sum = 0

        cart_position, cart_velocity, pole_angle, angle_rate_of_change = observation
        state = utils.build_state([
            utils.to_bin(cart_position, cart_position_bins),
            utils.to_bin(cart_velocity, cart_velocity_bins),
            utils.to_bin(pole_angle, pole_angle_bins),
            utils.to_bin(angle_rate_of_change, angle_rate_bins)
        ])

        record = VideoRecorder(env=env, path="cartpolev0.mp4")
        for t in range(max_number_of_steps):
            #env.render()

            # Pick an action based on the current state
            action = 0 if qlearn.getQ(state, 0) > qlearn.getQ(state, 1) else 1
            # Execute the action and get feedback
            observation, reward, done, info = env.step(action)

            # Digitize the observation to get a state
            cart_position, cart_velocity, pole_angle, angle_rate_of_change = observation
            nextState = utils.build_state([
                utils.to_bin(cart_position, cart_position_bins),
                utils.to_bin(cart_velocity, cart_velocity_bins),
                utils.to_bin(pole_angle, pole_angle_bins),
                utils.to_bin(angle_rate_of_change, angle_rate_bins)
def test_record_unrecordable_method():
    env = UnrecordableEnv()
    rec = VideoRecorder(env)
    assert not rec.enabled
    rec.close()
Example #28
0
            return actual_model(img_and_gaze_combined, num_actions, scope,
                                **kwargs)

        act = DeepqWithGaze.build_act(make_obs_ph=lambda name: U.Uint8Input(
            env.observation_space.shape, name=name),
                                      q_func=model_wrapper,
                                      num_actions=env.action_space.n)
        U.load_state(os.path.join(args.model_dir, "saved"))
        gaze_model.load_weights(
            'baselines/DeepqWithGaze/ImgOnly_gazeModels/seaquest-dp0.4-DQN+BNonInput.hdf5'
        )

        num_episodes = 0
        video_recorder = None
        video_recorder = VideoRecorder(env,
                                       args.video,
                                       enabled=args.video is not None)
        obs = env.reset()
        if args.debug_mode:
            fig, axarr = plt.subplots(2, 3)
            plt.show(block=False)
            debug_embed_last_time = time.time(
            )  # TODO this is temporary. delete it and its related code
            debug_embed_freq_sec = 600
        while True:
            if args.debug_mode and debug_gaze_in is not None:
                for i in range(4):
                    axarr[int(i / 2), i % 2].cla()
                    axarr[int(i / 2), i % 2].imshow(debug_gaze_in[0, :, :, i])
                axarr[1, 2].cla()
                axarr[1, 2].imshow(debug_gaze_in[0, :, :, 4])
    simulator = sim.env
    if hasattr(simulator, "env"):
        simulator = simulator.env

    simulator.set_state(
        real.env.model.data.qpos.ravel(),
        real.env.model.data.qvel.ravel()
    )


# dataset_train = MujocoTraintestPusherSimpleDataset(DATASET_PATH, for_training=True)
# dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)

match_env(env_real, env_simplus)
match_env(env_real, env_sim)
video_recorder_real = VideoRecorder(
    env_real, 'real.mp4', enabled=True)
video_recorder_simplus = VideoRecorder(
    env_simplus, 'sim+.mp4', enabled=True)
video_recorder_sim = VideoRecorder(
    env_sim, 'sim.mp4', enabled=True)

video_recorders = [video_recorder_real, video_recorder_simplus, video_recorder_sim]

# for i, data in enumerate(dataloader_train):
for i in range(40):
    for j in range(50):
        env_real.render()
        env_simplus.render()
        env_sim.render()

        # action = data["actions"][0, j].numpy()
Example #30
0
class VideoWrapper(Wrapper):
    """Creates videos from wrapped environment by called render after each timestep."""
    def __init__(self, env, directory, single_video=True):
        """

        :param env: (gym.Env) the wrapped environment.
        :param directory: the output directory.
        :param single_video: (bool) if True, generates a single video file, with episodes
                             concatenated. If False, a new video file is created for each episode.
                             Usually a single video file is what is desired. However, if one is
                             searching for an interesting episode (perhaps by looking at the
                             metadata), saving to different files can be useful.
        """
        super(VideoWrapper, self).__init__(env)
        self.episode_id = 0
        self.video_recorder = None
        self.single_video = single_video

        self.directory = osp.abspath(directory)

        # Make sure to not put multiple different runs in the same directory,
        # if the directory already exists
        error_msg = "You're trying to use the same directory twice, " \
                    "this would result in files being overwritten"
        assert not os.path.exists(self.directory), error_msg
        os.makedirs(self.directory, exist_ok=True)

    def _step(self, action):
        obs, rew, done, info = self.env.step(action)
        if done:
            winners = [i for i, d in info.items() if 'winner' in d]
            metadata = {'winners': winners}
            self.video_recorder.metadata.update(metadata)
        self.video_recorder.capture_frame()
        return obs, rew, done, info

    def _reset(self):
        self._reset_video_recorder()
        self.episode_id += 1
        return self.env.reset()

    def _reset_video_recorder(self):
        """Called at the start of each episode (by _reset). Always creates a video recorder
           if one does not already exist. When a video recorder is already present, it will only
           create a new one if `self.single_video == False`."""
        if self.video_recorder is not None:
            # Video recorder already started.
            if not self.single_video:
                # We want a new video for each episode, so destroy current recorder.
                self.video_recorder.close()
                self.video_recorder = None

        if self.video_recorder is None:
            # No video recorder -- start a new one.
            self.video_recorder = VideoRecorder(
                env=self.env,
                base_path=osp.join(self.directory,
                                   'video.{:06}'.format(self.episode_id)),
                metadata={'episode_id': self.episode_id},
            )

    def _close(self):
        if self.video_recorder is not None:
            self.video_recorder.close()
            self.video_recorder = None
        super(VideoWrapper, self)._close()
                              train_data.num_examples, batch_size))
valid_data = H5PYDataset(DATASET_PATH,
                         which_sets=('valid', ),
                         sources=('s_transition_obs', 'r_transition_obs',
                                  'obs', 'actions'))
stream_valid = DataStream(valid_data,
                          iteration_scheme=SequentialScheme(
                              train_data.num_examples, batch_size))

iterator = stream_train.get_epoch_iterator(as_dict=True)

data = next(iterator)
length = data["actions"].shape[1]

match_env(env, env2)
video_recorder = VideoRecorder(env, 'sim+backlash.mp4', enabled=True)
video_recorder2 = VideoRecorder(env2, 'sim+.mp4', enabled=True)

for i, data in enumerate(stream_train.get_epoch_iterator(as_dict=True)):
    for j in range(length):
        action = data["actions"]
        video_recorder.capture_frame()
        video_recorder2.capture_frame()
        new_obs, reward, done, info = env.step(action)
        new_obs2, reward2, done2, info2 = env2.step(action)

    if i == 4:
        break

video_recorder.close()
video_recorder.enabled = False
# train_data = H5PYDataset(
#     DATASET_PATH, which_sets=('train',), sources=('s_transition_obs','r_transition_obs', 'obs', 'actions')
# )
# stream_train = DataStream(train_data, iteration_scheme=SequentialScheme(train_data.num_examples, batch_size))
# valid_data = H5PYDataset(
#     DATASET_PATH, which_sets=('valid',), sources=('s_transition_obs','r_transition_obs', 'obs', 'actions')
# )
# stream_valid = DataStream(valid_data, iteration_scheme=SequentialScheme(train_data.num_examples, batch_size))

# iterator = stream_train.get_epoch_iterator(as_dict=True)
# data = next(iterator)

# length = data["actions"].shape[1]
length = 10

video_recorder = VideoRecorder(env, 'real_backlash.mp4', enabled=False)
video_recorder2 = VideoRecorder(env2, 'sim+.mp4', enabled=False)

# Only first six predicted by the lstm
num_obs = 10
array = np.zeros((2, 100, num_obs))

for i, data in enumerate(range(1)):
    env.reset()
    env2.reset()
    match_env(env, env2)
    new_obs = env.unwrapped._get_obs()
    new_obs2 = env2.unwrapped._get_obs()

    for j in range(100):
        # env.render()
Example #33
0

def match_env(real, sim):
    # set env1 (simulator) to that of env2 (real robot)
    sim.env.set_state(real.env.model.data.qpos.ravel(),
                      real.env.model.data.qvel.ravel())


dataset_train = MujocoTraintestPusherSimpleDataset(DATASET_PATH,
                                                   for_training=True)
dataloader_train = DataLoader(dataset_train,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

match_env(env_real, env_sim)
video_recorder = VideoRecorder(env_real, 'real.mp4', enabled=True)
video_recorder2 = VideoRecorder(env_sim, 'sim.mp4', enabled=True)

for i, data in enumerate(dataloader_train):
    for j in range(50):
        env_sim.render()
        env_real.render()

        action = data["actions"][0, j].numpy()

        video_recorder.capture_frame()
        video_recorder2.capture_frame()

        obs_real, _, _, _ = env_real.step(action.copy())
        obs_simp, _, _, _ = env_sim.step(action.copy())