def reset_video_recorder(self): # Close any existing video recorder if self.video_recorder: self._close_video_recorder() # Reset env rendering callback if self._video_enabled() and hasattr(self.env.unwrapped, 'set_rendering_callback'): self.env.unwrapped.set_rendering_callback(lambda: None) # Start recording the next video. self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=os.path.join( self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)), metadata={'episode_id': self.episode_id}, enabled=self._video_enabled(), ) # Instead of capturing just one frame, allow the environment to send all render frames when a step is ongoing if self._video_enabled() and hasattr(self.env.unwrapped, 'set_rendering_callback'): self.env.unwrapped.set_rendering_callback( self.video_recorder.capture_frame)
def step(self, action1): action0 = self.policy[0].act(stochastic=True, observation=self.observation[0])[0] action = (action0, np.asarray(action1)) self.observation, reward, done, infos = self.env.step(action) if done: self.vr.close() self.vr.enabled = False self.now += 1 self.vr = video_recorder.VideoRecorder( self.env, "./videos/" + str(self.now) + ".mp4", enabled="./videos/" + str(self.now) + ".mp4" is not None) print("Agent 1", reward[0], "Agent 2", reward[1]) if reward[0] >= 900: self.c1 += 1 elif reward[1] >= 900: self.c2 += 1 print(self.c1, self.c2, self.c1 + self.c2) return self.observation[1], reward[1] / 2000, done, {}
def reset_video_recorder(self): # Close any existing video recorder if self.video_recorder: self._close_video_recorder() # Start recording the next video. # # TODO: calculate a more correct 'episode_id' upon merge metadata = {} metadata['episode_id'] = self.episode_id metadata['video.frames_per_second'] = 10 self.video_recorder = video_recorder.VideoRecorder( env=self.env, # base_path=os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)), base_path=os.path.join( self.directory, '{}.video.{}.floor{:02}.seed{:02}'.format( self.file_prefix, self.file_infix, self.get_floor, self.get_seed)), # metadata={'episode_id': self.episode_id, 'video.frames_per_second': 10}, # enabled=self._video_enabled(), metadata=metadata, enabled=True, ) self.video_recorder.frames_per_sec = 10 self.video_recorder.capture_frame()
def ResetPoseExample(steps): """An example that the minitaur stands still using the reset pose.""" environment = pybullet_sim_gym_env.PyBulletSimGymEnv( pybullet_sim_factory=boxstack_pybullet_sim, debug_visualization=False, render=True, action_repeat=30) action = [math.pi / 2] * 8 vid = video_recorder.VideoRecorder(env=environment, path="vid.mp4") for _ in range(steps): print(_) startsim = time.time() _, _, done, _ = environment.step(action) stopsim = time.time() startrender = time.time() #environment.render(mode='rgb_array') vid.capture_frame() stoprender = time.time() print("env.step ", (stopsim - startsim)) print("env.render ", (stoprender - startrender)) if done: environment.reset()
def create_video_recorder(self): self.base_path = os.path.join('/tmp', '{}.video{}'.format(self.env_id, self.episode_id)) self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=self.base_path, metadata={'episode_id': self.episode_id}, enabled=self.video_callable(self.episode_id) )
def __init__(self, gnet, opt, global_ep, global_ep_r, res_queue, name): super(Worker, self).__init__() self.name = 'w%i' % name self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue self.gnet, self.opt = gnet, opt self.lnet = Net(N_S, N_A) # local network self.env = gym.make('Pendulum-v0').unwrapped self.vid = video_recorder.VideoRecorder( self.env, path="../recording/con-a3c-%i.mp4" % name)
def start_video_recorder(self): self.close_video_recorder() video_name = f"{self.name_prefix}-step-{self.step_id}-to-step-{self.step_id + self.video_length}" base_path = os.path.join(self.video_folder, video_name) self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=base_path, metadata={"step_id": self.step_id} ) self.video_recorder.capture_frame() self.recorded_frames = 1 self.recording = True
def start_video_recorder(self): self.close_video_recorder() base_path = os.path.join(self.directory, self.video_name) self.video_recorder = video_recorder.VideoRecorder( env=self.venv, base_path=base_path, metadata={'step_id': self.step_id}) self.video_recorder.capture_frame() self.recorded_frames = 1 self.recording = True
def start_video_recorder(self): self.close_video_recorder() base_path = os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.step_id)) self.video_recorder = video_recorder.VideoRecorder( env=self.venv, base_path=base_path, metadata={'step_id': self.step_id} ) self.video_recorder.capture_frame() self.recorded_frames = 1 self.recording = True
def start_video_recorder(self): self.close_video_recorder() video_name = '{}-step-{}-to-step-{}'.format( self.name_prefix, self.step_id, self.step_id + self.video_length) base_path = os.path.join(self.video_folder, video_name) self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=base_path, metadata={'step_id': self.step_id}) self.video_recorder.capture_frame() self.recorded_frames = 1 self.recording = True
def reset_video_recorder(self): # Close any existing video recorder if self.video_recorder: self._close_video_recorder() # Start recording the next video. ep_id = '{:09}'.format(self.episode_id) self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=os.path.join(self.directory, f'{self.file_prefix}.video.episode{ep_id}'), metadata={'episode_id': self.episode_id}, enabled=self._video_enabled(), ) self.video_recorder.capture_frame()
def reset_video_recorder(self): # Close any existing video recorder if self.video_recorder: self._close_video_recorder() # Start recording the next video. # # TODO: calculate a more correct 'episode_id' upon merge self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)), metadata={'episode_id': self.episode_id}, enabled=self._video_enabled(), ) self.video_recorder.capture_frame()
def reset_video_recorder(self): # Close any existing video recorder if self.video_recorder: self._close_video_recorder() self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=os.path.join( self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)), metadata={'episode_id': self.episode_id}, enabled=self.video_flg, ) self.video_recorder.capture_frame()
def start_video_recorder(self): """Start video recorder""" # Close the recorder if already currently recording self.close_video_recorder() # Define video recording's path vid = "{}_step{}_to_step{}".format(self.prefix, self.step_id, self.step_id + self.video_length) path = osp.join(self.save_dir, vid) # Define video recorder self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=path, metadata={'step_id': self.step_id}) # Render and add a frame to the video self.video_recorder.capture_frame() # Update running statistics self.num_recorded_frames = 1 self.recording = True
def start_video_recorder(self): self.close_video_recorder() base_path = os.path.join( self.directory, '{:04}-{:04}'.format(self.epoch_id, self.cycle_id)) self.video_recorder = video_recorder.VideoRecorder(env=self.venv, base_path=base_path, metadata={ 'epoch': self.epoch_id, 'cycle': self.cycle_id }) self.video_recorder.capture_frame() self.recorded_frames = 0 self.recording = True
def start_video_recorder(self): self.close_video_recorder() # If video name already exists, apply unique numbered suffix. path = self.video_folder / (self.name_prefix + '.mp4') if path.exists(): for i in itertools.count(): path = self.video_folder / (self.name_prefix + f'_{i}.mp4') if not path.exists(): break self.video_recorder = video_recorder.VideoRecorder( env=self.env, path=str(path), metadata={'step_id': self.step_id}) self.video_recorder.capture_frame() self.recorded_frames = 1 self.recording = True
def start_video_recorder(self): self.close_video_recorder() video_name = f"{self.name_prefix}-step-{self.step_id}" if self.episode_trigger: video_name = f"{self.name_prefix}-episode-{self.episode_id}" base_path = os.path.join(self.video_folder, video_name) self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=base_path, metadata={"step_id": self.step_id, "episode_id": self.episode_id}, ) self.video_recorder.capture_frame() self.recorded_frames = 1 self.recording = True
def reset(self): if self._new_video_every_reset: if self._recorder is not None: self._recorder.close() self._counter += 1 if self._base_name is not None: self._vid_name = os.path.join( self._base_path, self._base_name + "_" + str(self._counter)) else: self._vid_name = self._base_path + "_" + str(self._counter) self._recorder = video_recorder.VideoRecorder(self.env, path=self._vid_name + ".mp4") return self.env.reset()
def __init__(self): self.metadata = {'render.modes': ['human']} super(Env, self).__init__() self.env = gym.make("multicomp/KickAndDefend-v0") self.now = 0 self.vr = video_recorder.VideoRecorder( self.env, "./videos/" + str(self.now) + ".mp4", enabled="./videos/" + str(self.now) + ".mp4" is not None) policy_type = "lstm" config = argparse.Namespace( env='kick-and-defend', max_episodes=1000000, param_paths=[ 'agent-zoo/kick-and-defend/kicker/agent1_parameters-v2.pkl', 'agent-zoo/kick-and-defend/defender/agent2_parameters-v1.pkl' ]) param_paths = config.param_paths tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) sess = tf.Session(config=tf_config) sess.__enter__() self.c1 = 0 self.c2 = 0 self.policy = [] self.policy.append( LSTMPolicy(scope="policy0", reuse=False, ob_space=self.env.observation_space.spaces[0], ac_space=self.env.action_space.spaces[0], hiddens=[128, 128], normalize=True)) sess.run(tf.variables_initializer(tf.global_variables())) params = [load_from_file(param_pkl_path=path) for path in param_paths] setFromFlat(self.policy[0].get_variables(), params[0]) self.action_space = spaces.Box(low=-1, high=1, shape=(17, )) self.observation_space = spaces.Box(low=0, high=1, shape=(1, 384))
def test_ppo(env_id, seed, path_to_policy_params, n_envs = 1): """ env_id: typr str, identifies each environment uniquely num_timesteps: number of timesteps to run the algorithm seed: initial random seed policy: policy to be followed (mlp, cnn, lstm, etc) n_env: number of envs to run in parallel nminibatches: number of minibatches of mini batch gradient descent (first-order optimization) to update the policy params n_steps: number of steps in each update """ # Train PPO algorithm for num_timesteps # stack 4 frames for the vectorized environment # Note: PPO2 works only with vectorized environment env = VecFrameStack(make_atari_env(env_id = env_id, num_env = n_envs, seed=seed), 4) # define the policy # create model object for class PPO2 # The policy is CnnPolicy from stable baselines and has been trained for 2e7 time steps on Pong model = PPO2.load(path_to_policy_params) vr = video_recorder.VideoRecorder(env, base_path="./videos/Pong_test_without_attack", enabled="./videos/Pong_test_without_attack" is not None) obs = env.reset() ep_rew = [0.0] ep = 0 for i in range(50000): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) ep_rew[-1] += rewards env.render() vr.capture_frame() if dones: obs = env.reset() print('Net reward for episode ',ep,': ',ep_rew[-1]) if((ep+1)%10 == 0): print('Mean reward for last 10 episodes: ',np.mean(ep_rew[-10:])) ep_rew.append(0.0) ep += 1 print('Number of timesteps completed: ', i+1) env.close() vr.close()
def _reset_video_recorder(self) -> None: """Creates a video recorder if one does not already exist. Called at the start of each episode (by `reset`). When a video recorder is already present, it will only create a new one if `self.single_video == False`. """ if self.video_recorder is not None: # Video recorder already started. if not self.single_video: # We want a new video for each episode, so destroy current recorder. self.video_recorder.close() self.video_recorder = None if self.video_recorder is None: # No video recorder -- start a new one. self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=os.path.join( self.directory, "video.{:06}".format(self.episode_id) ), metadata={"episode_id": self.episode_id}, )
def __init__(self, env, base_path, base_name=None, new_video_every_reset=False): super(VideoWrapper, self).__init__(env) self._base_path = base_path self._base_name = base_name self._new_video_every_reset = new_video_every_reset if self._new_video_every_reset: self._counter = 0 self._recorder = None else: if self._base_name is not None: self._vid_name = os.path.join(self._base_path, self._base_name) else: self._vid_name = self._base_path self._recorder = video_recorder.VideoRecorder(self.env, path=self._vid_name + ".mp4")
rew_mean.append(mean) rew_var.append(var) max_reward = np.max(rewards_over_time) episode_max = np.argmax(rewards_over_time) if ep_rew_total >=300 : w = w + 1 agent.save(s_link) print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++") print("Episode: ", i) print("Time: ", np.round(time_space, 2),"secs") print("Reward:", ep_rew_total) print("Maximum Reward: " + str(max_reward) + " on Episode: " + str(episode_max)) print("Times win: " + str(w)) if i % 100 ==0: vids = video_recorder.VideoRecorder(env, path=(str(i) + '.mp4')) env.unwrapped.render() vids.capture_frame() print("Mean reward of the past 100 episodes: ", str(np.mean(rewards_over_time[-100:]))) mean_100.append(np.mean(rewards_over_time[-100:])) f = open('results.txt','a') f.write('\n' + str(np.mean(rewards_over_time[-100:]))) f.close() vids.close() # Start training the Neural Network hist, mm= agent.TRAIN(BATCH) epsilon.append(agent.e)
def StartRecording(self, video_filename: t.Text): """Starts to record a new animation; requires plot=True.""" self._recorder = video_recorder.VideoRecorder(self._gym_env, video_filename, enabled=True)
from torch.utils.tensorboard import SummaryWriter from rlberry.agents.torch.dqn import DQNAgent from rlberry.utils.logging import configure_logging from gym.wrappers.monitoring import video_recorder configure_logging(level="INFO") env = gym_make("CartPole-v0") agent = DQNAgent(env, epsilon_decay_interval=1000) agent.set_writer(SummaryWriter()) print(f"Running DQN on {env}") agent.fit(budget=50) vid = video_recorder.VideoRecorder( env, path="_video/video_plot_dqn.mp4", enabled=True, ) for episode in range(3): done = False state = env.reset() while not done: action = agent.policy(state) state, reward, done, _ = env.step(action) vid.capture_frame() env.close()
default=False, action='store_true') args = parser.parse_args() np.set_printoptions(precision=3) # enable dynamic GPU memory allocation physical_devices = tf.config.experimental.list_physical_devices('GPU') assert len(physical_devices) > 0 # Initialize OpenAI Procgen environment env = gym.make("procgen:procgen-starpilot-v0", num_levels=0, start_level=0, distribution_mode="easy") if args.record_video: video = video_recorder.VideoRecorder( env, base_path='../data/videos/recent_video') video.frames_per_sec = 60 args.total_steps = args.batch_size # only run one recorded batch else: video = None # Initialize Arrays sim_steps = 0 rewards_record = {} rewards_record['means'] = np.array([]) rewards_record['stds'] = np.array([]) rewards_record['max'] = np.array([]) # Initialize streamlit objects st.header('Visualize Current Simulation Batch') st.subheader('Training Batch Progress')
from pyvirtualdisplay import Display from gym.wrappers.monitoring import video_recorder d = Display() d.start() # Recording filename video_name = "./vid/Task2-2.mp4" # Setup the environment for the maze env = gym.make("maze-sample-10x10-v0") # Setup the video vid = None vid = video_recorder.VideoRecorder(env,video_name) # env = gym.wrappers.Monitor(env,'./vid',force=True) current_state = env.reset() """# Evaluation of Epsilon""" # For testing Epsilon I fix the gamma at 1 and learning rate (alpha) at 0.1 states_dic = {} #dictionary to keep the states/coordinates of the Q table count = 0 for i in range(10): for j in range(10): states_dic[i, j] = count count+=1