def __init__(self, env: MultiagentVecEnv, save_folder: str): super(VideoLogger, self).__init__() self.env = env self.save_folder = save_folder os.makedirs(save_folder, exist_ok=True) self.recorder = VideoRecorder(env, path=f'{save_folder}/0.mp4')
def replay_states(path): f, i, w = sample_parameters(mode) path = os.path.join(os.getcwd(), './windyslope.xml') model = load_model_from_path(path) randomize_dynamics(model, friction=f, insidebox=i, wind=w) env = WindySlope(model, mode.REAL, should_render=should_render) env.reset() with open('traj100-sample10.npz', 'rb') as f: states = np.load(f) if env.should_record: rec = VideoRecorder(env, path='/tmp/video/windyslope-predict-sample.mp4') for e in range(len(states)): episode = states[e] for i in range(len(episode)): qpos = episode[i][:3] env.data.qpos[:3] = qpos mat = episode[i][3:12] mat = np.asarray(mat).astype(np.float64) quat = np.empty(4) functions.mju_mat2Quat(quat, mat) print('quat: ', quat) env.data.qpos[3:] = quat #env.data.qvel[:3] = states[i][12:15] #env.data.qvel[3:] = states[i][15:18] #env.sim.forward() obs = env.get_observations(env.model, env.data) print('states:', episode[i]) print('obs:', obs) #assert np.allclose(obs, states[i]) env.render() if env.should_record: rec.capture_frame()
def record_video(env, policy, file_name, number_of_resets=1, max_time_steps=None): """ Records a video of a policy for a specified environment :param env: (causal_world.CausalWorld) the environment to use for recording. :param policy: the policy to be evaluated :param file_name: (str) full path where the video is being stored. :param number_of_resets: (int) the number of resets/episodes to be viewed :param max_time_steps: (int) the maximum number of time steps per episode :return: """ recorder = VideoRecorder(env, "{}.mp4".format(file_name)) for reset_idx in range(number_of_resets): policy.reset() obs = env.reset() recorder.capture_frame() if max_time_steps is not None: for i in range(max_time_steps): desired_action = policy.act(obs) obs, reward, done, info = env.step(action=desired_action) recorder.capture_frame() else: while True: desired_action = policy.act(obs) obs, reward, done, info = env.step(action=desired_action) recorder.capture_frame() if done: break recorder.close() return
def mutate(self, weights, record): """ Mutate the inputted weights and evaluate its performance against the weights of the previous generation. """ recorder = VideoRecorder(self.env, path=self.video_path) if record else None self.elite.set_weights(weights) self.oponent.set_weights(weights) perturbations = self.oponent.mutate(self.config['mutation_power']) _, oponent_reward1, ts1 = self.play_game(self.elite, self.oponent, recorder=recorder) oponent_reward2, _, ts2 = self.play_game(self.oponent, self.elite, recorder=recorder) if record: recorder.close() return { 'total_reward': np.mean([oponent_reward1, oponent_reward2]), 'timesteps_total': ts1 + ts2, 'video': None if not record else wandb.Video(self.video_path), 'noise': perturbations }
def test_no_frames(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.close() assert rec.empty assert rec.functional assert not os.path.exists(rec.path)
def start(env, agent: AgentBase): global video_recorder scores = [] total_steps = 0 video_recorder = None video_enabled = True video_ext = determine_extension(env) if not video_ext: video_enabled = False total_start_time = time.time() for episode in range(1, episodes): episode_start_time = time.time() if (episode % video_frequency) == 0: video_recorder = VideoRecorder(env, video_dir + "/{}{}".format(episode, video_ext), enabled=video_enabled) score, steps = run_episode(env, agent, video_recorder) scores.append(score) total_steps += steps if episode_log_frequency > 0 and (episode + 1) % episode_log_frequency == 0: log_episode_summary(episodes, episode, score, steps, time.time() - episode_start_time, time.time() - total_start_time) if episode % plot_frequency == 0: plot([i for i in range(episode)], scores) if video_recorder: video_recorder.close() video_recorder = None
def replay_save_video(env_id, policy, path_vids): if env_id == 'HalfCheetah-v2': from configs import cheetah_config nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \ nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cheetah_config() elif env_id == 'MountainCarContinuous-v0': nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \ nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cmc_config() env = gym.make(env_id) vid_env = VideoRecorder(env=env, path=path_vids) obs = env.reset() rew = np.zeros([nb_timesteps + 1]) done = False for t in range(nb_timesteps): if done: break act = controller.step(policy, obs).reshape(1, -1) out = env.step(np.copy(act)) env.render() # vid_env.capture_frame() obs = out[0] rew[t + 1] = out[1] done = out[2] print('Run performance: ', np.nansum(rew)) vid_env.close()
class RecordedEnv(gym.Wrapper): def __init__(self, env): super().__init__(env) self.start_of_episode = None def reset(self): self.start_of_episode = True return self.env.reset() def step(self, action, filename='', record_episode=False): if record_episode and self.start_of_episode: self.recorder = VideoRecorder(self.env, path=filename) self.start_of_episode = False if hasattr(self, 'recorder'): self.recorder.capture_frame() next_state, reward, done, info = self.env.step(action) if hasattr(self, 'recorder') and done: self.recorder.close() # close and save video at end of episode del self.recorder return next_state, reward, done, info def close(self): if hasattr(self, 'recorder'): self.recorder.capture_frame() self.recorder.close() del self.recorder self.start_of_episode = True return self.env.close()
def step(self, action, filename='', record_episode=False): if record_episode and self.start_of_episode: self.recorder = VideoRecorder(self.env, path=filename) self.start_of_episode = False if hasattr(self, 'recorder'): self.recorder.capture_frame() next_state, reward, done, info = self.env.step(action) if hasattr(self, 'recorder') and done: self.recorder.close() # close and save video at end of episode del self.recorder return next_state, reward, done, info
def perform_debug_sequence(sequence_name, env, walker_type, sequence_of_actions, action_repeat=10, has_gravity=False): video_folder = debug_folder + "\\" + walker_type if not os.path.exists(video_folder): os.makedirs(video_folder) if not has_gravity: env.world.gravity = (0, 0) else: env.world.gravity = (0, -10) video_recorder = VideoRecorder( env, video_folder + "\\" + sequence_name + ".mp4", ) # Stump Tracks env.reset() for action in sequence_of_actions: for i in range(action_repeat): _, _, d, _ = env.step(action) video_recorder.capture_frame() time.sleep(0.01) video_recorder.capture_frame() video_recorder.close()
def record_video_of_episode(episode, file_name, env_wrappers=np.array([]), env_wrappers_args=np.array([])): """ Records a video of a logged episode for a specified environment :param episode: (Episode) the logged episode :param file_name: (str) full path where the video is being stored. :param env_wrappers: (list) a list of gym wrappers :param env_wrappers_args: (list) a list of kwargs for the gym wrappers :return: """ actual_skip_frame = episode.world_params["skip_frame"] env = get_world(episode.task_name, episode.task_params, episode.world_params, enable_visualization=False, env_wrappers=env_wrappers, env_wrappers_args=env_wrappers_args) env.set_starting_state(episode.initial_full_state, check_bounds=False) recorder = VideoRecorder(env, "{}.mp4".format(file_name)) recorder.capture_frame() for time, observation, reward, action in zip(episode.timestamps, episode.observations, episode.rewards, episode.robot_actions): for _ in range(actual_skip_frame): env.step(action) recorder.capture_frame() recorder.close() env.close()
def record(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec_path = rec.path proc = rec.encoder.proc assert proc.poll() is None # subprocess is running # The function ends without an explicit `rec.close()` call # The Python interpreter will implicitly do `del rec` on garbage cleaning return rec_path, proc
def record_rollout(env, horizon, policy, record_file_path): """ This is the recording function for the runner class which samples one episode with a specified length using the provided policy and records it in a video. Parameters --------- horizon: Int The task horizon/ episode length. policy: ModelBasedBasePolicy or ModelFreeBasePolicy The policy to be used in collecting the episodes from the different agents. record_file_path: String specified the file path to save the video that will be recorded in. """ recorder = VideoRecorder(env, record_file_path + '.mp4') observations = env.reset() for t in range(horizon): recorder.capture_frame() if not isinstance(policy, ModelFreeBasePolicy): action_to_execute, expected_obs, expected_reward = policy.act( observations, t) else: action_to_execute = policy.act(observations, t) observations, reward, done, info = env.step(action_to_execute) recorder.capture_frame() recorder.close() return
def show_smart_agent(): env = gym.make('Acrobot-v1') recorder = VideoRecorder(env, path='./video.mp4', enabled=True) state = env.reset() for t in range(1000): recorder.capture_frame() action, _ = policy.act(state) env.render() state, reward, done, _ = env.step(action) if done: break time.sleep(0.05) env.close()
class RecordMonitor(Thread): def __init__(self, env, monitor_path): Thread.__init__(self) # The starting time self.rec = VideoRecorder(env, path=monitor_path) def reset_timer(self): self.start_time = time.time() def get_rec(self): self.rec.capture_frame() # 放在底层会使得控制不准! def stop(self): self.rec.close()
def evaluate(self, weights): """ Evlauate weights by playing against a random policy. """ recorder = VideoRecorder(self.env, path=self.video_path_eval) self.elite.set_weights(weights) reward, _, ts = self.play_game(self.elite, RandomPolicy( self.config['number_actions']), recorder=recorder, eval=True) recorder.close() return { 'total_reward': reward, 'timesteps_total': ts, 'video': wandb.Video(self.video_path_eval), }
def robo_view(): # env = AcrobotEnv() env = gym.make('Acrobot-v1') video = VideoRecorder(env, path='./acrobot.mp4', enabled=True) state = env.reset() for i in range(10000): video.capture_frame() action, _ = model.get_action(state) env.render() state, rewards, finish, _ = env.step(action) if finish: break time.sleep(0.1) env.close()
def after_train(self, logs: Optional[dict] = None, obs: Optional[Dict[str, torch.Tensor]] = None, rewards: Optional[Dict[str, torch.Tensor]] = None, dones: Optional[Dict[str, torch.Tensor]] = None, infos: Optional[Dict[str, torch.Tensor]] = None): # If there is just one env save each episode to a different file # Otherwise save the whole video at the end if self.env.num_envs == 1: if logs['env_done']: # Save video and make a new recorder self.recorder.close() self.recorder = VideoRecorder( self.env, path=f'{self.save_folder}/{logs["episodes"]}.mp4')
def _predict(self, model, video_path): """Run predictions on trained RL model. """ vr = VideoRecorder(env=self._env, path="{}/rl_out.mp4".format(video_path, str(MPI.COMM_WORLD.Get_rank())), enabled=True) obs = self._env.reset() for i in range(1000): action, _states = model.predict(obs) obs, rewards, dones, info = self._env.step(action) if dones: obs = self._env.reset() self._env.render(mode='rgb_array') vr.capture_frame() vr.close() self._env.close()
def __init__( self, xml_file: Path, goals: List[GoalSpec], starts: Dict[str, Box], steps_per_action: int = 300, obs_type: str = None, render: bool = False, record: bool = False, record_freq: int = None, render_freq: int = None, record_path: Path = None, ): self.starts = starts self.goals_specs = goals self.goals = None self._time_steps = 0 if not xml_file.is_absolute(): xml_file = get_xml_filepath(xml_file) self._obs_type = obs_type # required for OpenAI code self.metadata = {'render.modes': 'rgb_array'} self.reward_range = -np.inf, np.inf self.spec = None self.video_recorder = None self._record = any([record, record_path, record_freq]) self._render = any([render, render_freq]) self.record_freq = record_freq or 20 self.render_freq = render_freq or 20 record_path = record_path or '/tmp/training-video' self.steps_per_action = steps_per_action self._block_name = 'block0' self._finger_names = ['hand_l_distal_link', 'hand_r_distal_link'] if self._record: self.video_recorder = VideoRecorder( env=self, base_path=record_path, enabled=True, ) super().__init__(str(xml_file), frame_skip=self.record_freq) self.initial_state = self.sim.get_state()
def demoRecordPushJoints(self, raw_path="videos/KukaPushJoints"): file = open("KukaPushJointsTrajectory2.txt", 'r') env = self.env test_rollouts = 5 goals = [[0.68, -0.18, 0.85], [0.60, -0.3, 0.85], [0.72, -0.28, 0.85], [0.58, -0.3, 0.85], [0.62, -0.25, 0.85]] recorder = VideoRecorder(env.env.env, base_path=raw_path) jointsTrajectory = re.sub(r"([^[])\s+([^]])", r"\1 \2", file.readline()) jointsTrajectory = np.array(literal_eval(jointsTrajectory)) env.reset() env.set_goal(np.array(goals[1])) for j in range(len(jointsTrajectory)): self.env.stepJoints(jointsTrajectory[j]) recorder.capture_frame() print("... done.")
def play(env, model, video_path, num_episodes, timesteps, metadata): video_recorder = None for i_episodes in range(num_episodes): video_recorder = VideoRecorder( env=env, path=video_path, metadata=metadata, enabled=video_path is not None) obs = env.reset() for t in range(timesteps): obs = [np.array([[list(obs)]])] video_recorder.capture_frame() action = model.predict(obs)[0] obs, rew, done, info = env.step(action) env.render() theta.append(obs[0]) theta_dot.append(obs[1]) actions.append(action[0]) if done: print("Episode finished after {} timesteps".format(t + 1)) num_episodes += 1 # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False break env.close() return theta
def save_video(agent, env_name, video_path): num_episodes = 0 # set up environment env = gym.make(env_name) state = env.reset() state = torch.tensor(state) # set up video recoder video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None)
def reset_recorder(self, record_path: Path): record_path.mkdir(parents=True, exist_ok=True) print(f'Recording video to {record_path}.mp4') video_recorder = VideoRecorder( env=self, base_path=str(record_path), enabled=True, ) closer.Closer().register(video_recorder) return video_recorder
def _reset_video_recorder(self): """Called at the start of each episode (by _reset). Always creates a video recorder if one does not already exist. When a video recorder is already present, it will only create a new one if `self.single_video == False`.""" if self.video_recorder is not None: # Video recorder already started. if not self.single_video: # We want a new video for each episode, so destroy current recorder. self.video_recorder.close() self.video_recorder = None if self.video_recorder is None: # No video recorder -- start a new one. self.video_recorder = VideoRecorder( env=self.env, base_path=osp.join(self.directory, "video.{:06}".format(self.episode_id)), metadata={"episode_id": self.episode_id}, )
def sample(self, horizon, policy, record_fname=None): """Samples a rollout from the agent. Arguments: horizon: (int) The length of the rollout to generate from the agent. policy: (policy) The policy that the agent will use for actions. record_fname: (str/None) The name of the file to which a recording of the rollout will be saved. If None, the rollout will not be recorded. Returns: (dict) A dictionary containing data from the rollout. The keys of the dictionary are 'obs', 'ac', and 'reward_sum'. """ video_record = record_fname is not None recorder = None if not video_record else VideoRecorder( self.env, record_fname) times, rewards = [], [] O, A, reward_sum, done = [self.env.reset()], [], 0, False policy.reset() for t in range(horizon): if video_record: recorder.capture_frame() start = time.time() A.append(policy.act(O[t], t)) times.append(time.time() - start) if self.noise_stddev is None: obs, reward, done, info = self.env.step(A[t]) else: action = A[t] + np.random.normal( loc=0, scale=self.noise_stddev, size=[self.dU]) action = np.minimum( np.maximum(action, self.env.action_space.low), self.env.action_space.high) obs, reward, done, info = self.env.step(action) O.append(obs) reward_sum += reward rewards.append(reward) if done: break if video_record: recorder.capture_frame() recorder.close() print("Average action selection time: ", np.mean(times)) print("Rollout length: ", len(A)) return { "obs": np.array(O), "ac": np.array(A), "reward_sum": reward_sum, "rewards": np.array(rewards), }
def agent_sample(env, horizon, policy, record_fname): solution = None video_record = record_fname is not None recorder = None if not video_record else VideoRecorder(env, record_fname) times, rewards = [], [] O, A, reward_sum, done = [env.reset()], [], 0, False policy.reset() for t in range(horizon): if video_record: recorder.capture_frame() start = time.time() solution = policy.act(O[t], t) A.append(solution) times.append(time.time() - start) # === Do action on real jaco === # move_jaco_real(A[t] + O[t][0:9]) print("ac: ", A[t] + O[t][0:9]) obs, reward, done, info = env.step(A[t] + O[t][0:9]) print("obs: ", obs) print("reward: ", reward) # === Get obs from real jaco === # angles = get_jaco_angles() # obs_angles = real_to_sim(angles) # O[t][0:9] = obs_angles # === sync === # move_mojuco_to_real(env) screen = env.render(mode='rgb_array') cv2.imshow('image', cv2.cvtColor(screen, cv2.COLOR_BGR2RGB)) if (cv2.waitKey(25) & 0xFF == ord('q')): cv2.destroyAllWindows() break O.append(obs) reward_sum += reward rewards.append(reward) if done: break # === stop === ipdb.set_trace() if video_record: recorder.capture_frame() recorder.close()
def _record_video(env, agent, output_path): """Records a video of an agent playing a gaming simulation. Args: env: The environment for the agent to act in. agent: An RL agent created by _create_agent. output_path (str): The directory path of where to save the recording. """ video_recorder = VideoRecorder(env, RECORDING_NAME) _play(agent, env, False, recorder=video_recorder) video_recorder.close() env.close() # Check if output directory is google cloud and save there if so. if output_path.startswith("gs://"): [bucket_name, blob_path] = output_path[5:].split("/", 1) storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) blob = bucket.blob(blob_path + RECORDING_NAME) blob.upload_from_filename(RECORDING_NAME)
def play_with_buffer(env, pi, num_episodes=3, video_path=None): video_path = 'videos/REINFORCE_best.mp4' video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) rep = ReplayMemory(pi.config) obs_mask = np.array([[1, 0, 0, 0], [0, 0, 1, 0]]) for e_i in range(num_episodes): s = env.reset() z = np.matmul(obs_mask, s) rep.add(z, 0, 0) # env.unwrapped.render() done = False while not done: a = int(pi(rep.getState())) s_prime, r_t, done, _ = env.step(a) z_prime = np.matmul(obs_mask, s_prime) rep.add(z_prime, r_t, a) video_recorder.capture_frame() # env.render() # s = s_prime # z = z_prime video_recorder.close() # video_recorder.enabled = False env.close()
class VideoLogger(Callback): def __init__(self, env: MultiagentVecEnv, save_folder: str): super(VideoLogger, self).__init__() self.env = env self.save_folder = save_folder os.makedirs(save_folder, exist_ok=True) self.recorder = VideoRecorder(env, path=f'{save_folder}/0.mp4') def before_step(self, logs: Optional[dict] = None, actions: Optional[Dict[str, torch.Tensor]] = None, action_distributions: Optional[Dict[str, Distribution]] = None): self.recorder.capture_frame() def after_train(self, logs: Optional[dict] = None, obs: Optional[Dict[str, torch.Tensor]] = None, rewards: Optional[Dict[str, torch.Tensor]] = None, dones: Optional[Dict[str, torch.Tensor]] = None, infos: Optional[Dict[str, torch.Tensor]] = None): # If there is just one env save each episode to a different file # Otherwise save the whole video at the end if self.env.num_envs == 1: if logs['env_done']: # Save video and make a new recorder self.recorder.close() self.recorder = VideoRecorder( self.env, path=f'{self.save_folder}/{logs["episodes"]}.mp4') def on_train_end(self): self.recorder.close()
def run_policy(env, get_action, env_params_list, max_ep_len=None, episode_id=0, record=False, recording_path=None, no_render=False, use_baselines=False): if record: if os.name == "nt": full_path = os.path.join(pathlib.Path().absolute(), recording_path) full_path_len = len(full_path) nb_char_to_remove = full_path_len - 245 if nb_char_to_remove > 0: recording_path = recording_path[:-nb_char_to_remove] video_recorder = VideoRecorder(env, recording_path + "_ep" + str(episode_id) + ".mp4", enabled=True) if use_baselines: env.get_raw_env().set_environment(**env_params_list[episode_id]) else: env.set_environment(**env_params_list[episode_id]) if use_baselines: _, o = env.reset() else: o = env.reset() r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 while True: if record and video_recorder.enabled: video_recorder.capture_frame() if not record and not no_render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, i = env.step(a) if use_baselines: ep_ret += i[0]["original_reward"][0] else: ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): print('Episode %d \t EpRet %.3f \t EpLen %d' % (episode_id, ep_ret, ep_len)) if record and video_recorder.enabled: video_recorder.close() video_recorder.enabled = False break return ep_ret