def __init__(self, env: MultiagentVecEnv, save_folder: str):
        super(VideoLogger, self).__init__()
        self.env = env
        self.save_folder = save_folder

        os.makedirs(save_folder, exist_ok=True)
        self.recorder = VideoRecorder(env, path=f'{save_folder}/0.mp4')
Exemple #2
0
def replay_states(path):
    f, i, w = sample_parameters(mode)
    path = os.path.join(os.getcwd(), './windyslope.xml')
    model = load_model_from_path(path)
    randomize_dynamics(model, friction=f, insidebox=i, wind=w)
    env = WindySlope(model, mode.REAL, should_render=should_render)
    env.reset()
    with open('traj100-sample10.npz', 'rb') as f:
        states = np.load(f)

    if env.should_record:
        rec = VideoRecorder(env, path='/tmp/video/windyslope-predict-sample.mp4')

    for e in range(len(states)):
        episode = states[e]
        for i in range(len(episode)):
            qpos = episode[i][:3]
            env.data.qpos[:3] = qpos
            mat = episode[i][3:12]
            mat = np.asarray(mat).astype(np.float64)
            quat = np.empty(4)
            functions.mju_mat2Quat(quat, mat)
            print('quat: ', quat)
            env.data.qpos[3:] = quat
            #env.data.qvel[:3] = states[i][12:15]
            #env.data.qvel[3:] = states[i][15:18]
            
            #env.sim.forward()
            obs = env.get_observations(env.model, env.data)
            print('states:', episode[i])
            print('obs:', obs)
            #assert np.allclose(obs, states[i])
            env.render()
            if env.should_record:
                rec.capture_frame()
Exemple #3
0
def record_video(env,
                 policy,
                 file_name,
                 number_of_resets=1,
                 max_time_steps=None):
    """
    Records a video of a policy for a specified environment
    :param env: (causal_world.CausalWorld) the environment to use for
                                           recording.
    :param policy: the policy to be evaluated
    :param file_name: (str) full path where the video is being stored.
    :param number_of_resets: (int) the number of resets/episodes to be viewed
    :param max_time_steps: (int) the maximum number of time steps per episode
    :return:
    """
    recorder = VideoRecorder(env, "{}.mp4".format(file_name))
    for reset_idx in range(number_of_resets):
        policy.reset()
        obs = env.reset()
        recorder.capture_frame()
        if max_time_steps is not None:
            for i in range(max_time_steps):
                desired_action = policy.act(obs)
                obs, reward, done, info = env.step(action=desired_action)
                recorder.capture_frame()
        else:
            while True:
                desired_action = policy.act(obs)
                obs, reward, done, info = env.step(action=desired_action)
                recorder.capture_frame()
                if done:
                    break
    recorder.close()
    return
    def mutate(self, weights, record):
        """ Mutate the inputted weights and evaluate its performance against the
        weights of the previous generation. """
        recorder = VideoRecorder(self.env,
                                 path=self.video_path) if record else None
        self.elite.set_weights(weights)
        self.oponent.set_weights(weights)
        perturbations = self.oponent.mutate(self.config['mutation_power'])

        _, oponent_reward1, ts1 = self.play_game(self.elite,
                                                 self.oponent,
                                                 recorder=recorder)
        oponent_reward2, _, ts2 = self.play_game(self.oponent,
                                                 self.elite,
                                                 recorder=recorder)

        if record:
            recorder.close()

        return {
            'total_reward': np.mean([oponent_reward1, oponent_reward2]),
            'timesteps_total': ts1 + ts2,
            'video': None if not record else wandb.Video(self.video_path),
            'noise': perturbations
        }
Exemple #5
0
def test_no_frames():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.close()
    assert rec.empty
    assert rec.functional
    assert not os.path.exists(rec.path)
def start(env, agent: AgentBase):
    global video_recorder
    scores = []
    total_steps = 0
    video_recorder = None
    video_enabled = True
    video_ext = determine_extension(env)
    if not video_ext:
        video_enabled = False

    total_start_time = time.time()
    for episode in range(1, episodes):
        episode_start_time = time.time()

        if (episode % video_frequency) == 0:
            video_recorder = VideoRecorder(env, video_dir + "/{}{}".format(episode, video_ext), enabled=video_enabled)

        score, steps = run_episode(env, agent, video_recorder)

        scores.append(score)
        total_steps += steps

        if episode_log_frequency > 0 and (episode + 1) % episode_log_frequency == 0:
            log_episode_summary(episodes, episode, score, steps, time.time() - episode_start_time,
                                time.time() - total_start_time)

        if episode % plot_frequency == 0:
            plot([i for i in range(episode)], scores)

        if video_recorder:
            video_recorder.close()
            video_recorder = None
Exemple #7
0
def replay_save_video(env_id, policy, path_vids):

    if env_id == 'HalfCheetah-v2':
        from configs import cheetah_config
        nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \
        nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cheetah_config()
    elif env_id == 'MountainCarContinuous-v0':
        nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \
        nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cmc_config()

    env = gym.make(env_id)
    vid_env = VideoRecorder(env=env, path=path_vids)
    obs = env.reset()
    rew = np.zeros([nb_timesteps + 1])
    done = False
    for t in range(nb_timesteps):
        if done:
            break
        act = controller.step(policy, obs).reshape(1, -1)
        out = env.step(np.copy(act))
        env.render()
        # vid_env.capture_frame()
        obs = out[0]
        rew[t + 1] = out[1]
        done = out[2]
    print('Run performance: ', np.nansum(rew))
    vid_env.close()
Exemple #8
0
class RecordedEnv(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.start_of_episode = None

    def reset(self):
        self.start_of_episode = True
        return self.env.reset()

    def step(self, action, filename='', record_episode=False):
        if record_episode and self.start_of_episode:
            self.recorder = VideoRecorder(self.env, path=filename)
        self.start_of_episode = False

        if hasattr(self, 'recorder'):
            self.recorder.capture_frame()

        next_state, reward, done, info = self.env.step(action)
        if hasattr(self, 'recorder') and done:
            self.recorder.close()  # close and save video at end of episode
            del self.recorder

        return next_state, reward, done, info

    def close(self):
        if hasattr(self, 'recorder'):
            self.recorder.capture_frame()
            self.recorder.close()
            del self.recorder
        self.start_of_episode = True
        return self.env.close()
Exemple #9
0
    def step(self, action, filename='', record_episode=False):
        if record_episode and self.start_of_episode:
            self.recorder = VideoRecorder(self.env, path=filename)
        self.start_of_episode = False

        if hasattr(self, 'recorder'):
            self.recorder.capture_frame()

        next_state, reward, done, info = self.env.step(action)
        if hasattr(self, 'recorder') and done:
            self.recorder.close()  # close and save video at end of episode
            del self.recorder

        return next_state, reward, done, info
Exemple #10
0
def perform_debug_sequence(sequence_name,
                           env,
                           walker_type,
                           sequence_of_actions,
                           action_repeat=10,
                           has_gravity=False):
    video_folder = debug_folder + "\\" + walker_type
    if not os.path.exists(video_folder):
        os.makedirs(video_folder)

    if not has_gravity:
        env.world.gravity = (0, 0)
    else:
        env.world.gravity = (0, -10)

    video_recorder = VideoRecorder(
        env,
        video_folder + "\\" + sequence_name + ".mp4",
    )  # Stump Tracks
    env.reset()

    for action in sequence_of_actions:
        for i in range(action_repeat):
            _, _, d, _ = env.step(action)
            video_recorder.capture_frame()
            time.sleep(0.01)
            video_recorder.capture_frame()
    video_recorder.close()
Exemple #11
0
def record_video_of_episode(episode,
                            file_name,
                            env_wrappers=np.array([]),
                            env_wrappers_args=np.array([])):
    """
     Records a video of a logged episode for a specified environment

     :param episode: (Episode) the logged episode
     :param file_name: (str) full path where the video is being stored.
     :param env_wrappers: (list) a list of gym wrappers
     :param env_wrappers_args: (list) a list of kwargs for the gym wrappers
     :return:
     """
    actual_skip_frame = episode.world_params["skip_frame"]
    env = get_world(episode.task_name,
                    episode.task_params,
                    episode.world_params,
                    enable_visualization=False,
                    env_wrappers=env_wrappers,
                    env_wrappers_args=env_wrappers_args)
    env.set_starting_state(episode.initial_full_state, check_bounds=False)
    recorder = VideoRecorder(env, "{}.mp4".format(file_name))
    recorder.capture_frame()
    for time, observation, reward, action in zip(episode.timestamps,
                                                 episode.observations,
                                                 episode.rewards,
                                                 episode.robot_actions):
        for _ in range(actual_skip_frame):
            env.step(action)
            recorder.capture_frame()
    recorder.close()
    env.close()
Exemple #12
0
    def record():
        env = gym.make("CartPole-v1")
        rec = VideoRecorder(env)
        env.reset()
        rec.capture_frame()

        rec_path = rec.path
        proc = rec.encoder.proc

        assert proc.poll() is None  # subprocess is running

        # The function ends without an explicit `rec.close()` call
        # The Python interpreter will implicitly do `del rec` on garbage cleaning
        return rec_path, proc
Exemple #13
0
def record_rollout(env, horizon, policy, record_file_path):
    """
    This is the recording function for the runner class which samples one episode with a specified length
    using the provided policy and records it in a video.


    Parameters
    ---------
    horizon: Int
        The task horizon/ episode length.
    policy: ModelBasedBasePolicy or ModelFreeBasePolicy
        The policy to be used in collecting the episodes from the different agents.
    record_file_path: String
        specified the file path to save the video that will be recorded in.
    """
    recorder = VideoRecorder(env, record_file_path + '.mp4')
    observations = env.reset()
    for t in range(horizon):
        recorder.capture_frame()
        if not isinstance(policy, ModelFreeBasePolicy):
            action_to_execute, expected_obs, expected_reward = policy.act(
                observations, t)
        else:
            action_to_execute = policy.act(observations, t)
        observations, reward, done, info = env.step(action_to_execute)
    recorder.capture_frame()
    recorder.close()
    return
Exemple #14
0
def show_smart_agent():
    env = gym.make('Acrobot-v1')
    recorder = VideoRecorder(env, path='./video.mp4', enabled=True)
    state = env.reset()

    for t in range(1000):
        recorder.capture_frame()
        action, _ = policy.act(state)
        env.render()
        state, reward, done, _ = env.step(action)
        if done:
            break
        time.sleep(0.05)

    env.close()
Exemple #15
0
class RecordMonitor(Thread):
    def __init__(self, env, monitor_path):
        Thread.__init__(self)
        # The starting time

        self.rec = VideoRecorder(env, path=monitor_path)

    def reset_timer(self):
        self.start_time = time.time()

    def get_rec(self):
        self.rec.capture_frame()  # 放在底层会使得控制不准!

    def stop(self):
        self.rec.close()
 def evaluate(self, weights):
     """ Evlauate weights by playing against a random policy. """
     recorder = VideoRecorder(self.env, path=self.video_path_eval)
     self.elite.set_weights(weights)
     reward, _, ts = self.play_game(self.elite,
                                    RandomPolicy(
                                        self.config['number_actions']),
                                    recorder=recorder,
                                    eval=True)
     recorder.close()
     return {
         'total_reward': reward,
         'timesteps_total': ts,
         'video': wandb.Video(self.video_path_eval),
     }
Exemple #17
0
def robo_view():
    # env = AcrobotEnv()
    env = gym.make('Acrobot-v1')
    video = VideoRecorder(env, path='./acrobot.mp4', enabled=True)
    state = env.reset()

    for i in range(10000):
        video.capture_frame()
        action, _ = model.get_action(state)
        env.render()
        state, rewards, finish, _ = env.step(action)
        if finish:
            break
        time.sleep(0.1)
    env.close()
 def after_train(self,
                 logs: Optional[dict] = None,
                 obs: Optional[Dict[str, torch.Tensor]] = None,
                 rewards: Optional[Dict[str, torch.Tensor]] = None,
                 dones: Optional[Dict[str, torch.Tensor]] = None,
                 infos: Optional[Dict[str, torch.Tensor]] = None):
     # If there is just one env save each episode to a different file
     # Otherwise save the whole video at the end
     if self.env.num_envs == 1:
         if logs['env_done']:
             # Save video and make a new recorder
             self.recorder.close()
             self.recorder = VideoRecorder(
                 self.env,
                 path=f'{self.save_folder}/{logs["episodes"]}.mp4')
    def _predict(self, model, video_path):
        """Run predictions on trained RL model.
        """

        vr = VideoRecorder(env=self._env, path="{}/rl_out.mp4".format(video_path, str(MPI.COMM_WORLD.Get_rank())),
                           enabled=True)
        obs = self._env.reset()
        for i in range(1000):
            action, _states = model.predict(obs)
            obs, rewards, dones, info = self._env.step(action)
            if dones:
                obs = self._env.reset()
            self._env.render(mode='rgb_array')
            vr.capture_frame()
        vr.close()
        self._env.close()
Exemple #20
0
    def __init__(
        self,
        xml_file: Path,
        goals: List[GoalSpec],
        starts: Dict[str, Box],
        steps_per_action: int = 300,
        obs_type: str = None,
        render: bool = False,
        record: bool = False,
        record_freq: int = None,
        render_freq: int = None,
        record_path: Path = None,
    ):
        self.starts = starts
        self.goals_specs = goals
        self.goals = None
        self._time_steps = 0
        if not xml_file.is_absolute():
            xml_file = get_xml_filepath(xml_file)

        self._obs_type = obs_type

        # required for OpenAI code
        self.metadata = {'render.modes': 'rgb_array'}
        self.reward_range = -np.inf, np.inf
        self.spec = None

        self.video_recorder = None
        self._record = any([record, record_path, record_freq])
        self._render = any([render, render_freq])
        self.record_freq = record_freq or 20
        self.render_freq = render_freq or 20
        record_path = record_path or '/tmp/training-video'
        self.steps_per_action = steps_per_action
        self._block_name = 'block0'
        self._finger_names = ['hand_l_distal_link', 'hand_r_distal_link']

        if self._record:
            self.video_recorder = VideoRecorder(
                env=self,
                base_path=record_path,
                enabled=True,
            )

        super().__init__(str(xml_file), frame_skip=self.record_freq)
        self.initial_state = self.sim.get_state()
Exemple #21
0
    def demoRecordPushJoints(self, raw_path="videos/KukaPushJoints"):
        file = open("KukaPushJointsTrajectory2.txt", 'r')
        env = self.env
        test_rollouts = 5
        goals = [[0.68, -0.18, 0.85], [0.60, -0.3, 0.85], [0.72, -0.28, 0.85],
                 [0.58, -0.3, 0.85], [0.62, -0.25, 0.85]]
        recorder = VideoRecorder(env.env.env, base_path=raw_path)
        jointsTrajectory = re.sub(r"([^[])\s+([^]])", r"\1 \2",
                                  file.readline())
        jointsTrajectory = np.array(literal_eval(jointsTrajectory))
        env.reset()
        env.set_goal(np.array(goals[1]))
        for j in range(len(jointsTrajectory)):
            self.env.stepJoints(jointsTrajectory[j])
            recorder.capture_frame()

        print("... done.")
Exemple #22
0
def play(env, model, video_path, num_episodes, timesteps, metadata):
    video_recorder = None
    for i_episodes in range(num_episodes):
        video_recorder = VideoRecorder(
            env=env, path=video_path, metadata=metadata, enabled=video_path is not None)
        obs = env.reset()
        for t in range(timesteps):
            obs = [np.array([[list(obs)]])]
            video_recorder.capture_frame()
            action = model.predict(obs)[0]
            obs, rew, done, info = env.step(action)
            env.render()
            theta.append(obs[0])
            theta_dot.append(obs[1])
            actions.append(action[0])
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                num_episodes += 1
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
                break
    env.close()
    return theta
Exemple #23
0
def save_video(agent, env_name, video_path):
    num_episodes = 0
    # set up environment
    env = gym.make(env_name)
    state = env.reset()
    state = torch.tensor(state)

    # set up video recoder
    video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None)
Exemple #24
0
 def reset_recorder(self, record_path: Path):
     record_path.mkdir(parents=True, exist_ok=True)
     print(f'Recording video to {record_path}.mp4')
     video_recorder = VideoRecorder(
         env=self,
         base_path=str(record_path),
         enabled=True,
     )
     closer.Closer().register(video_recorder)
     return video_recorder
Exemple #25
0
    def _reset_video_recorder(self):
        """Called at the start of each episode (by _reset). Always creates a video recorder
           if one does not already exist. When a video recorder is already present, it will only
           create a new one if `self.single_video == False`."""
        if self.video_recorder is not None:
            # Video recorder already started.
            if not self.single_video:
                # We want a new video for each episode, so destroy current recorder.
                self.video_recorder.close()
                self.video_recorder = None

        if self.video_recorder is None:
            # No video recorder -- start a new one.
            self.video_recorder = VideoRecorder(
                env=self.env,
                base_path=osp.join(self.directory,
                                   "video.{:06}".format(self.episode_id)),
                metadata={"episode_id": self.episode_id},
            )
Exemple #26
0
    def sample(self, horizon, policy, record_fname=None):
        """Samples a rollout from the agent.

        Arguments: 
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.

        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(
            self.env, record_fname)

        times, rewards = [], []
        O, A, reward_sum, done = [self.env.reset()], [], 0, False

        policy.reset()
        for t in range(horizon):
            if video_record:
                recorder.capture_frame()
            start = time.time()
            A.append(policy.act(O[t], t))
            times.append(time.time() - start)

            if self.noise_stddev is None:
                obs, reward, done, info = self.env.step(A[t])
            else:
                action = A[t] + np.random.normal(
                    loc=0, scale=self.noise_stddev, size=[self.dU])
                action = np.minimum(
                    np.maximum(action, self.env.action_space.low),
                    self.env.action_space.high)
                obs, reward, done, info = self.env.step(action)
            O.append(obs)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        print("Average action selection time: ", np.mean(times))
        print("Rollout length: ", len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
        }
def agent_sample(env, horizon, policy, record_fname):
    solution = None
    video_record = record_fname is not None
    recorder = None if not video_record else VideoRecorder(env, record_fname)

    times, rewards = [], []
    O, A, reward_sum, done = [env.reset()], [], 0, False

    policy.reset()

    for t in range(horizon):
        if video_record:
            recorder.capture_frame()

        start = time.time()

        solution = policy.act(O[t], t)
        A.append(solution)

        times.append(time.time() - start)

        # === Do action on real jaco ===
        # move_jaco_real(A[t] + O[t][0:9])

        print("ac: ", A[t] + O[t][0:9])
        obs, reward, done, info = env.step(A[t] + O[t][0:9])
        print("obs: ", obs)
        print("reward: ", reward)

        # === Get obs from real jaco ===
        # angles = get_jaco_angles()
        # obs_angles = real_to_sim(angles)
        # O[t][0:9] = obs_angles

        # === sync ===
        # move_mojuco_to_real(env)

        screen = env.render(mode='rgb_array')
        cv2.imshow('image', cv2.cvtColor(screen, cv2.COLOR_BGR2RGB))
        if (cv2.waitKey(25) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

        O.append(obs)
        reward_sum += reward
        rewards.append(reward)
        if done:
            break

        # === stop ===
        ipdb.set_trace()

    if video_record:
        recorder.capture_frame()
        recorder.close()
def _record_video(env, agent, output_path):
    """Records a video of an agent playing a gaming simulation.

    Args:
      env: The environment for the agent to act in.
      agent: An RL agent created by _create_agent.
      output_path (str): The directory path of where to save the recording.
    """
    video_recorder = VideoRecorder(env, RECORDING_NAME)
    _play(agent, env, False, recorder=video_recorder)
    video_recorder.close()
    env.close()

    # Check if output directory is google cloud and save there if so.
    if output_path.startswith("gs://"):
        [bucket_name, blob_path] = output_path[5:].split("/", 1)
        storage_client = storage.Client()
        bucket = storage_client.get_bucket(bucket_name)
        blob = bucket.blob(blob_path + RECORDING_NAME)
        blob.upload_from_filename(RECORDING_NAME)
Exemple #29
0
def play_with_buffer(env, pi, num_episodes=3, video_path=None):
    video_path = 'videos/REINFORCE_best.mp4'
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    rep = ReplayMemory(pi.config)
    obs_mask = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
    for e_i in range(num_episodes):
        s = env.reset()
        z = np.matmul(obs_mask, s)
        rep.add(z, 0, 0)
        # env.unwrapped.render()
        done = False
        while not done:
            a = int(pi(rep.getState()))
            s_prime, r_t, done, _ = env.step(a)
            z_prime = np.matmul(obs_mask, s_prime)
            rep.add(z_prime, r_t, a)
            video_recorder.capture_frame()
            # env.render()
            # s = s_prime
            # z = z_prime
    video_recorder.close()
    # video_recorder.enabled = False
    env.close()
class VideoLogger(Callback):
    def __init__(self, env: MultiagentVecEnv, save_folder: str):
        super(VideoLogger, self).__init__()
        self.env = env
        self.save_folder = save_folder

        os.makedirs(save_folder, exist_ok=True)
        self.recorder = VideoRecorder(env, path=f'{save_folder}/0.mp4')

    def before_step(self,
                    logs: Optional[dict] = None,
                    actions: Optional[Dict[str, torch.Tensor]] = None,
                    action_distributions: Optional[Dict[str,
                                                        Distribution]] = None):
        self.recorder.capture_frame()

    def after_train(self,
                    logs: Optional[dict] = None,
                    obs: Optional[Dict[str, torch.Tensor]] = None,
                    rewards: Optional[Dict[str, torch.Tensor]] = None,
                    dones: Optional[Dict[str, torch.Tensor]] = None,
                    infos: Optional[Dict[str, torch.Tensor]] = None):
        # If there is just one env save each episode to a different file
        # Otherwise save the whole video at the end
        if self.env.num_envs == 1:
            if logs['env_done']:
                # Save video and make a new recorder
                self.recorder.close()
                self.recorder = VideoRecorder(
                    self.env,
                    path=f'{self.save_folder}/{logs["episodes"]}.mp4')

    def on_train_end(self):
        self.recorder.close()
def run_policy(env,
               get_action,
               env_params_list,
               max_ep_len=None,
               episode_id=0,
               record=False,
               recording_path=None,
               no_render=False,
               use_baselines=False):
    if record:
        if os.name == "nt":
            full_path = os.path.join(pathlib.Path().absolute(), recording_path)
            full_path_len = len(full_path)
            nb_char_to_remove = full_path_len - 245
            if nb_char_to_remove > 0:
                recording_path = recording_path[:-nb_char_to_remove]
        video_recorder = VideoRecorder(env,
                                       recording_path + "_ep" +
                                       str(episode_id) + ".mp4",
                                       enabled=True)

    if use_baselines:
        env.get_raw_env().set_environment(**env_params_list[episode_id])
    else:
        env.set_environment(**env_params_list[episode_id])

    if use_baselines:
        _, o = env.reset()
    else:
        o = env.reset()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    while True:
        if record and video_recorder.enabled:
            video_recorder.capture_frame()
        if not record and not no_render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, i = env.step(a)
        if use_baselines:
            ep_ret += i[0]["original_reward"][0]
        else:
            ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            print('Episode %d \t EpRet %.3f \t EpLen %d' %
                  (episode_id, ep_ret, ep_len))
            if record and video_recorder.enabled:
                video_recorder.close()
                video_recorder.enabled = False
            break
    return ep_ret