def replay_save_video(env_id, policy, path_vids): if env_id == 'HalfCheetah-v2': from configs import cheetah_config nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \ nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cheetah_config() elif env_id == 'MountainCarContinuous-v0': nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \ nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cmc_config() env = gym.make(env_id) vid_env = VideoRecorder(env=env, path=path_vids) obs = env.reset() rew = np.zeros([nb_timesteps + 1]) done = False for t in range(nb_timesteps): if done: break act = controller.step(policy, obs).reshape(1, -1) out = env.step(np.copy(act)) env.render() # vid_env.capture_frame() obs = out[0] rew[t + 1] = out[1] done = out[2] print('Run performance: ', np.nansum(rew)) vid_env.close()
def play(env, model, video_path, num_episodes, timesteps, metadata): video_recorder = None for i_episodes in range(num_episodes): video_recorder = VideoRecorder( env=env, path=video_path, metadata=metadata, enabled=video_path is not None) obs = env.reset() for t in range(timesteps): obs = [np.array([[list(obs)]])] video_recorder.capture_frame() action = model.predict(obs)[0] obs, rew, done, info = env.step(action) env.render() theta.append(obs[0]) theta_dot.append(obs[1]) actions.append(action[0]) if done: print("Episode finished after {} timesteps".format(t + 1)) num_episodes += 1 # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False break env.close() return theta
def testCartpole(model, test=True, record=True): env = gym.make(ENV_NAME) if record: rec = VideoRecorder(env, "./video/iteration_%s.mp4" % str(model)) observation_space = env.observation_space.shape[0] action_space = env.action_space.n dqn_solver = DQNSolver(observation_space, action_space, load_path="./artifacts/iteration_%s/" % str(model), test=test) run = 0 while True: state = env.reset() state = np.reshape(state, [1, observation_space]) step = 0 run += 1 while True: step += 1 env.render() if record: rec.capture_frame() action = dqn_solver.act(state) state_next, reward, terminal, info = env.step(action) state_next = np.reshape(state_next, [1, observation_space]) state = state_next if terminal: print("Run: " + str(run) + ", score: " + str(step)) if record: rec.close() record = False break
def perform_debug_sequence(sequence_name, env, walker_type, sequence_of_actions, action_repeat=10, has_gravity=False): video_folder = debug_folder + "\\" + walker_type if not os.path.exists(video_folder): os.makedirs(video_folder) if not has_gravity: env.world.gravity = (0, 0) else: env.world.gravity = (0, -10) video_recorder = VideoRecorder( env, video_folder + "\\" + sequence_name + ".mp4", ) # Stump Tracks env.reset() for action in sequence_of_actions: for i in range(action_repeat): _, _, d, _ = env.step(action) video_recorder.capture_frame() time.sleep(0.01) video_recorder.capture_frame() video_recorder.close()
def play_episode(episode=0): video_fn = 'episode_' + str(episode) + '.mp4' video_path = os.path.join(video_dir, video_fn) video_recorder = VideoRecorder(env, video_path) length = 0 obs = env.reset() done = False while not done and length < max_episode_length: observation, reward, done, _ = env.step() video_recorder.capture_frame() # Optain waypoints waypoints = [] for entity in env.get_team_blue.tolist() + env.get_team_red.tolist(): waypoints.extend(entity.get_loc()) length += 1 # Closer video_recorder.close() vid = mp.VideoFileClip(video_path) # Check if episode has right length played if length <= min_length or length >= max_length: return # Post Processing if env.blue_win and len(vid_success) < num_success: vid_success.append(vid) elif env.red_win and len(vid_failure) < num_failure: vid_failure.append(vid)
def start(env, agent: AgentBase): global video_recorder scores = [] total_steps = 0 video_recorder = None video_enabled = True video_ext = determine_extension(env) if not video_ext: video_enabled = False total_start_time = time.time() for episode in range(1, episodes): episode_start_time = time.time() if (episode % video_frequency) == 0: video_recorder = VideoRecorder(env, video_dir + "/{}{}".format(episode, video_ext), enabled=video_enabled) score, steps = run_episode(env, agent, video_recorder) scores.append(score) total_steps += steps if episode_log_frequency > 0 and (episode + 1) % episode_log_frequency == 0: log_episode_summary(episodes, episode, score, steps, time.time() - episode_start_time, time.time() - total_start_time) if episode % plot_frequency == 0: plot([i for i in range(episode)], scores) if video_recorder: video_recorder.close() video_recorder = None
def test_no_frames(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.close() assert rec.empty assert rec.functional assert not os.path.exists(rec.path)
def record_rollout(env, horizon, policy, record_file_path): """ This is the recording function for the runner class which samples one episode with a specified length using the provided policy and records it in a video. Parameters --------- horizon: Int The task horizon/ episode length. policy: ModelBasedBasePolicy or ModelFreeBasePolicy The policy to be used in collecting the episodes from the different agents. record_file_path: String specified the file path to save the video that will be recorded in. """ recorder = VideoRecorder(env, record_file_path + '.mp4') observations = env.reset() for t in range(horizon): recorder.capture_frame() if not isinstance(policy, ModelFreeBasePolicy): action_to_execute, expected_obs, expected_reward = policy.act( observations, t) else: action_to_execute = policy.act(observations, t) observations, reward, done, info = env.step(action_to_execute) recorder.capture_frame() recorder.close() return
class VideoLogger(Callback): def __init__(self, env: MultiagentVecEnv, save_folder: str): super(VideoLogger, self).__init__() self.env = env self.save_folder = save_folder os.makedirs(save_folder, exist_ok=True) self.recorder = VideoRecorder(env, path=f'{save_folder}/0.mp4') def before_step(self, logs: Optional[dict] = None, actions: Optional[Dict[str, torch.Tensor]] = None, action_distributions: Optional[Dict[str, Distribution]] = None): self.recorder.capture_frame() def after_train(self, logs: Optional[dict] = None, obs: Optional[Dict[str, torch.Tensor]] = None, rewards: Optional[Dict[str, torch.Tensor]] = None, dones: Optional[Dict[str, torch.Tensor]] = None, infos: Optional[Dict[str, torch.Tensor]] = None): # If there is just one env save each episode to a different file # Otherwise save the whole video at the end if self.env.num_envs == 1: if logs['env_done']: # Save video and make a new recorder self.recorder.close() self.recorder = VideoRecorder( self.env, path=f'{self.save_folder}/{logs["episodes"]}.mp4') def on_train_end(self): self.recorder.close()
class RecordedEnv(gym.Wrapper): def __init__(self, env): super().__init__(env) self.start_of_episode = None def reset(self): self.start_of_episode = True return self.env.reset() def step(self, action, filename='', record_episode=False): if record_episode and self.start_of_episode: self.recorder = VideoRecorder(self.env, path=filename) self.start_of_episode = False if hasattr(self, 'recorder'): self.recorder.capture_frame() next_state, reward, done, info = self.env.step(action) if hasattr(self, 'recorder') and done: self.recorder.close() # close and save video at end of episode del self.recorder return next_state, reward, done, info def close(self): if hasattr(self, 'recorder'): self.recorder.capture_frame() self.recorder.close() del self.recorder self.start_of_episode = True return self.env.close()
def mutate(self, weights, record): """ Mutate the inputted weights and evaluate its performance against the weights of the previous generation. """ recorder = VideoRecorder(self.env, path=self.video_path) if record else None self.elite.set_weights(weights) self.oponent.set_weights(weights) perturbations = self.oponent.mutate(self.config['mutation_power']) _, oponent_reward1, ts1 = self.play_game(self.elite, self.oponent, recorder=recorder) oponent_reward2, _, ts2 = self.play_game(self.oponent, self.elite, recorder=recorder) if record: recorder.close() return { 'total_reward': np.mean([oponent_reward1, oponent_reward2]), 'timesteps_total': ts1 + ts2, 'video': None if not record else wandb.Video(self.video_path), 'noise': perturbations }
def play(self): from gym.wrappers.monitoring.video_recorder import VideoRecorder rec = VideoRecorder(self.env, base_path=os.path.join( self.log_dir, self.log_dir.rsplit('/', 1)[1])) observation = self.env.reset() r = 0 done = False while not done: action, q_vals = self.act(np.asarray(observation)[np.newaxis]) next_state, reward, done, _ = self.env.step(action) r += reward observation = next_state self.env.render() rec.capture_frame() time.sleep(0.05) print('Game ended with score: ', r) self.env.close() rec.close()
def record_video(env, policy, file_name, number_of_resets=1, max_time_steps=None): """ Records a video of a policy for a specified environment :param env: (causal_world.CausalWorld) the environment to use for recording. :param policy: the policy to be evaluated :param file_name: (str) full path where the video is being stored. :param number_of_resets: (int) the number of resets/episodes to be viewed :param max_time_steps: (int) the maximum number of time steps per episode :return: """ recorder = VideoRecorder(env, "{}.mp4".format(file_name)) for reset_idx in range(number_of_resets): policy.reset() obs = env.reset() recorder.capture_frame() if max_time_steps is not None: for i in range(max_time_steps): desired_action = policy.act(obs) obs, reward, done, info = env.step(action=desired_action) recorder.capture_frame() else: while True: desired_action = policy.act(obs) obs, reward, done, info = env.step(action=desired_action) recorder.capture_frame() if done: break recorder.close() return
def record_video_of_episode(episode, file_name, env_wrappers=np.array([]), env_wrappers_args=np.array([])): """ Records a video of a logged episode for a specified environment :param episode: (Episode) the logged episode :param file_name: (str) full path where the video is being stored. :param env_wrappers: (list) a list of gym wrappers :param env_wrappers_args: (list) a list of kwargs for the gym wrappers :return: """ actual_skip_frame = episode.world_params["skip_frame"] env = get_world(episode.task_name, episode.task_params, episode.world_params, enable_visualization=False, env_wrappers=env_wrappers, env_wrappers_args=env_wrappers_args) env.set_starting_state(episode.initial_full_state, check_bounds=False) recorder = VideoRecorder(env, "{}.mp4".format(file_name)) recorder.capture_frame() for time, observation, reward, action in zip(episode.timestamps, episode.observations, episode.rewards, episode.robot_actions): for _ in range(actual_skip_frame): env.step(action) recorder.capture_frame() recorder.close() env.close()
def play_with_buffer(env, pi, num_episodes=3, video_path=None): video_path = 'videos/REINFORCE_best.mp4' video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None) rep = ReplayMemory(pi.config) obs_mask = np.array([[1, 0, 0, 0], [0, 0, 1, 0]]) for e_i in range(num_episodes): s = env.reset() z = np.matmul(obs_mask, s) rep.add(z, 0, 0) # env.unwrapped.render() done = False while not done: a = int(pi(rep.getState())) s_prime, r_t, done, _ = env.step(a) z_prime = np.matmul(obs_mask, s_prime) rep.add(z_prime, r_t, a) video_recorder.capture_frame() # env.render() # s = s_prime # z = z_prime video_recorder.close() # video_recorder.enabled = False env.close()
def demoRecordPickAndPlaceObstacle( self, raw_path="videos/KukaPickAndPlaceObstacle"): env = self.env test_rollouts = 5 goals = [[0.80948876, -0.24847823, 0.85], [0.90204398, -0.24176245, 0.85], [0.72934716, -0.19637749, 0.85], [0.6970663, -0.25643907, 0.85], [0.7029464, -0.18765762, 0.85]] recorder = VideoRecorder(env.env.env, base_path=raw_path) acc_sum, obs = 0.0, [] test_rollouts = 5 for i in range(test_rollouts): env.reset() env.set_goal(np.array(goals[i])) obs.append(goal_based_process(env.get_obs())) print("Rollout {}/{} ...".format(i + 1, test_rollouts)) for timestep in range(200): actions = self.my_step_batch(obs) obs, infos = [], [] ob, _, _, info = env.step(actions[0]) obs.append(goal_based_process(ob)) infos.append(info) recorder.capture_frame() recorder.close()
def play_episode(frame_count, episode = 0): """ play episode and render it into .gif """ # Set video recorder video_dir = os.path.join(data_dir, 'raw_videos') if not os.path.exists(data_dir): os.mkdir(data_dir) if not os.path.exists(video_dir): os.mkdir(video_dir) video_fn = 'episode_' + str(episode) + '.mp4' video_path = os.path.join(video_dir, video_fn) video_recorder = VideoRecorder(env, video_path) # Reset environmnet observation = env.reset() # Rollout episode episode_length = 0. done = 0 while (done == 0): # set exploration rate for this frame video_recorder.capture_frame() episode_length += 1 observation, reward, done, _ = env.step() # stop the episode if it goes too long if episode_length >= max_episode_length: reward = -100. done = True # Closer video_recorder.close() vid = mp.VideoFileClip(video_path) success_flag = env.blue_win survival_rate = sum([agent.isAlive for agent in env.get_team_blue]) / len(env.get_team_blue) kill_rate = sum([not agent.isAlive for agent in env.get_team_red]) / len(env.get_team_red) if success_flag == 1 and len(vid_success) < num_success: vid_success.append(vid) success_episode_num.append(episode) elif success_flag == 0 and len(vid_failure) < num_failure: vid_failure.append(vid) failure_episode_num.append(episode) # rendering vid to .gif video_dir = os.path.join(data_dir, 'gif_videos') if not os.path.exists(video_dir): os.mkdir(video_dir) video_fn = 'episode_' + str(episode) + '.gif' video_path = os.path.join(video_dir, video_fn) vid.write_gif(video_path, fps=500) return episode_length, reward, frame_count + episode_length, survival_rate, kill_rate, success_flag
def test_record_breaking_render_method(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.capture_frame() rec.close() assert rec.empty assert rec.broken assert not os.path.exists(rec.path)
def test_text_envs(): env = gym.make("FrozenLake-v1") video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def test(self, gamesNum=100, adversary=None, advDetector=None, render=False, verbose=True, videoPath=None): """ Test trained DQN agent. """ recordVideo = videoPath is not None if recordVideo: recorder = VideoRecorder(self.env, videoPath) gameRewards = [] gameLengths = [] attacksNumbers = [] for i in range(gamesNum): done = False s = utils.preprocess(self.env.reset()) frames = np.expand_dims(np.repeat(s, 4, 2), 0) gameReward = 0.0 gameLength = 0 attNum = 0 while not done: actionScores, actionProbs = self.sess.run( [self.logits, self.probs], feed_dict={self.inputs: frames}) isAdvState, advFrames = self._attack(adversary, frames, actionProbs) if advDetector is not None: advDetector.isAdv(advFrames, isAdvState) attNum += isAdvState for j in range(self.frameSkip): sj, r, done, _ = self.env.step(np.argmax(actionScores)) gameReward += r gameLength += 1 if render: self.env.render() if recordVideo: recorder.capture_frame() frames = utils.pushframe(frames, utils.preprocess(sj)) gameRewards.append(gameReward) gameLengths.append(gameLength) attacksNumbers.append(attNum) if verbose: print("Finished test game " + str(i + 1) + " / " + str(gamesNum) + " reward = " + str(gameReward)) print('{"metric": "loss", "value":' + str(gameReward) + '}') print("Agent achieved average reward of " + str(np.mean(gameRewards)) + " in " + str(gamesNum) + " games.") print('{"metric": "loss", "value":' + str(np.mean(gameRewards)) + '}') if recordVideo: recorder.close() return gameRewards, gameLengths, attacksNumbers, advDetector
def run_policy(env, get_action, env_params_list, max_ep_len=None, episode_id=0, record=False, recording_path=None, no_render=False, use_baselines=False): if record: if os.name == "nt": full_path = os.path.join(pathlib.Path().absolute(), recording_path) full_path_len = len(full_path) nb_char_to_remove = full_path_len - 245 if nb_char_to_remove > 0: recording_path = recording_path[:-nb_char_to_remove] video_recorder = VideoRecorder(env, recording_path + "_ep" + str(episode_id) + ".mp4", enabled=True) if use_baselines: env.get_raw_env().set_environment(**env_params_list[episode_id]) else: env.set_environment(**env_params_list[episode_id]) if use_baselines: _, o = env.reset() else: o = env.reset() r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 while True: if record and video_recorder.enabled: video_recorder.capture_frame() if not record and not no_render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, i = env.step(a) if use_baselines: ep_ret += i[0]["original_reward"][0] else: ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): print('Episode %d \t EpRet %.3f \t EpLen %d' % (episode_id, ep_ret, ep_len)) if record and video_recorder.enabled: video_recorder.close() video_recorder.enabled = False break return ep_ret
def test_record_simple(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
def play_episode(frame_count, episode=0): # Set video recorder video_dir = os.path.join(data_dir, 'raw_videos') if not os.path.exists(data_dir): os.mkdir(data_dir) if not os.path.exists(video_dir): os.mkdir(video_dir) video_fn = 'episode_' + str(episode) + '.mp4' video_path = os.path.join(video_dir, video_fn) video_recorder = VideoRecorder(env, video_path) # Reset environmnet observation = env.reset() # Rollout episode episode_length = 0. done = 0 while (done == 0): # set exploration rate for this frame video_recorder.capture_frame() episode_length += 1 # state consists of the centered observations of each agent action = policy_blue.gen_action(env.get_team_blue, env._env) # Full observability observation, reward, done, _ = env.step(action) # stop the episode if it goes too long if episode_length >= max_episode_length: reward = -100. done = True # Post Statistics success_flag = env.blue_win survival_rate = sum([agent.isAlive for agent in env.get_team_blue]) / len( env.get_team_blue) kill_rate = sum([not agent.isAlive for agent in env.get_team_red]) / len(env.get_team_red) # Closer video_recorder.close() vid = mp.VideoFileClip(video_path) if success_flag == 1 and len(vid_success) < num_success: vid_success.append(vid) elif success_flag == 0 and len(vid_failure) < num_failure: vid_failure.append(vid) return episode_length, reward, frame_count + episode_length, survival_rate, kill_rate, success_flag
def rollout(env, policy, max_steps=1000, action_noise=0.0, render_dir=None): """Run one rollout and return data. Args: env: The environment with reset and step function. policy: The state->action policy to roll out. max_steps: The maximum number of steps in the episode. action_noise: The probability of adding noise to the action before sending to the environment. Action noise does not get logged in rollout data. Returns: A dict of torch tensors, with time dimension for 'obs', 'act' 'rew' and 'done'. """ rollout_data = { 'obs': [], 'act': [], 'rew': [], 'done': [], 'total_rew': 0.0, 'num_steps': 0, } # Initialize collection. obs = env.reset() done = False if render_dir is not None: video_recorder = VideoRecorder(env, base_path=render_dir) while not done and rollout_data['num_steps'] < max_steps: if render_dir is not None: video_recorder.capture_frame() rollout_data['num_steps'] += 1 rollout_data['obs'].append(obs) act = policy(obs) rollout_data['act'].append(act) if action_noise > 0.0: if np.random.uniform() < action_noise: act += env.action_space.sample() obs, rew, done, _ = env.step(act) rollout_data['rew'].append(rew) rollout_data['done'].append(done) if render_dir is not None: video_recorder.close() rollout_data = utils.tree_apply(torch.tensor, rollout_data) for k in ['obs', 'act', 'rew', 'done']: rollout_data[k] = torch.stack(rollout_data[k]) return rollout_data
class RecordMonitor(Thread): def __init__(self, env, monitor_path): Thread.__init__(self) # The starting time self.rec = VideoRecorder(env, path=monitor_path) def reset_timer(self): self.start_time = time.time() def get_rec(self): self.rec.capture_frame() # 放在底层会使得控制不准! def stop(self): self.rec.close()
def evaluate(self, weights): """ Evlauate weights by playing against a random policy. """ recorder = VideoRecorder(self.env, path=self.video_path_eval) self.elite.set_weights(weights) reward, _, ts = self.play_game(self.elite, RandomPolicy( self.config['number_actions']), recorder=recorder, eval=True) recorder.close() return { 'total_reward': reward, 'timesteps_total': ts, 'video': wandb.Video(self.video_path_eval), }
def life(): yield 'moment', 'born' try: from gym.wrappers.monitoring.video_recorder import VideoRecorder with habitat() as env: if record: recorder = VideoRecorder(env, record) yield 'env', env life = yield_(live(env)(individual)) for moment in life: if record: recorder.capture_frame() yield 'moment', moment if record: recorder.close() return life.value except AssertionError: self = yield from contribution() return self
def _predict(self, model, video_path): """Run predictions on trained RL model. """ vr = VideoRecorder(env=self._env, path="{}/rl_out.mp4".format(video_path, str(MPI.COMM_WORLD.Get_rank())), enabled=True) obs = self._env.reset() for i in range(1000): action, _states = model.predict(obs) obs, rewards, dones, info = self._env.step(action) if dones: obs = self._env.reset() self._env.render(mode='rgb_array') vr.capture_frame() vr.close() self._env.close()
def test(self, agent): config = self._config for i in range(3): video_path = 'ppo_{0}_{1}_{2:d}.mp4'.format(config.name, config.model, i) video_recorder = VideoRecorder(self._env, video_path, enabled=video_path is not None, fps=15) state0 = torch.tensor(self._env.reset(), dtype=torch.float32).unsqueeze(0).to(config.device) done = False while not done: self._env.render() video_recorder.capture_frame() action0 = agent.get_action(state0, True) next_state, reward, done, info = self._env.step(action0.item()) state0 = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0).to(config.device) video_recorder.close()
def _run_episode(self, step, render=False): target = self.target.detach().cpu().numpy() t, ep_reward = 0, 0 env = self.env if render: video_path = self.log_dir / 'video' video_path.mkdir(exist_ok=True, parents=True) video_recorder = VideoRecorder( env=self.env, base_path=str(video_path / f'{step}'), metadata={'step': step}, enabled=True, ) else: video_recorder = None state, done = env.reset(), False # extra variables for proper execution (initial value not important) goal = None while not done and t < self.ep_len: if video_recorder: video_recorder.capture_frame() state_tens = torch.from_numpy(state).float().to(self.device) # TODO: run episodes exactly like we collect experience if t == 0 or (t + 1) % self.params.c == 0: goal = self.agent_hi.actor(state_tens).squeeze(0) action = self.agent_lo.actor(torch.cat([state_tens, goal], dim=-1)).squeeze(0) next_state, _, _, _ = env.step(action.detach().cpu().numpy()) reward = dense_reward(next_state, target, self.goal_dim) done = success_judge(next_state, target, self.goal_dim) next_state_tens = torch.from_numpy(next_state).float().to(self.device) t += 1 ep_reward += reward goal = goal + (state_tens - next_state_tens)[:self.goal_dim] state = next_state if video_recorder: video_recorder.close() return done, ep_reward