Exemple #1
0
    def reset_video_recorder(self):
        # Close any existing video recorder
        if self.video_recorder:
            self._close_video_recorder()

        # Reset env rendering callback
        if self._video_enabled() and hasattr(self.env.unwrapped,
                                             'set_rendering_callback'):
            self.env.unwrapped.set_rendering_callback(lambda: None)

        # Start recording the next video.
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env,
            base_path=os.path.join(
                self.directory,
                '{}.video.{}.video{:06}'.format(self.file_prefix,
                                                self.file_infix,
                                                self.episode_id)),
            metadata={'episode_id': self.episode_id},
            enabled=self._video_enabled(),
        )

        # Instead of capturing just one frame, allow the environment to send all render frames when a step is ongoing
        if self._video_enabled() and hasattr(self.env.unwrapped,
                                             'set_rendering_callback'):
            self.env.unwrapped.set_rendering_callback(
                self.video_recorder.capture_frame)
    def step(self, action1):

        action0 = self.policy[0].act(stochastic=True,
                                     observation=self.observation[0])[0]
        action = (action0, np.asarray(action1))
        self.observation, reward, done, infos = self.env.step(action)

        if done:
            self.vr.close()
            self.vr.enabled = False
            self.now += 1
            self.vr = video_recorder.VideoRecorder(
                self.env,
                "./videos/" + str(self.now) + ".mp4",
                enabled="./videos/" + str(self.now) + ".mp4" is not None)

            print("Agent 1", reward[0], "Agent 2", reward[1])

            if reward[0] >= 900:
                self.c1 += 1
            elif reward[1] >= 900:
                self.c2 += 1
            print(self.c1, self.c2, self.c1 + self.c2)

        return self.observation[1], reward[1] / 2000, done, {}
Exemple #3
0
    def reset_video_recorder(self):
        # Close any existing video recorder
        if self.video_recorder:
            self._close_video_recorder()

        # Start recording the next video.
        #
        # TODO: calculate a more correct 'episode_id' upon merge
        metadata = {}
        metadata['episode_id'] = self.episode_id
        metadata['video.frames_per_second'] = 10
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env,
            # base_path=os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)),
            base_path=os.path.join(
                self.directory, '{}.video.{}.floor{:02}.seed{:02}'.format(
                    self.file_prefix, self.file_infix, self.get_floor,
                    self.get_seed)),
            # metadata={'episode_id': self.episode_id, 'video.frames_per_second': 10},
            # enabled=self._video_enabled(),
            metadata=metadata,
            enabled=True,
        )
        self.video_recorder.frames_per_sec = 10
        self.video_recorder.capture_frame()
Exemple #4
0
def ResetPoseExample(steps):
    """An example that the minitaur stands still using the reset pose."""

    environment = pybullet_sim_gym_env.PyBulletSimGymEnv(
        pybullet_sim_factory=boxstack_pybullet_sim,
        debug_visualization=False,
        render=True,
        action_repeat=30)
    action = [math.pi / 2] * 8

    vid = video_recorder.VideoRecorder(env=environment, path="vid.mp4")

    for _ in range(steps):
        print(_)
        startsim = time.time()
        _, _, done, _ = environment.step(action)
        stopsim = time.time()
        startrender = time.time()
        #environment.render(mode='rgb_array')
        vid.capture_frame()
        stoprender = time.time()
        print("env.step ", (stopsim - startsim))
        print("env.render ", (stoprender - startrender))
        if done:
            environment.reset()
Exemple #5
0
 def create_video_recorder(self):
     self.base_path = os.path.join('/tmp', '{}.video{}'.format(self.env_id, self.episode_id))
     self.video_recorder = video_recorder.VideoRecorder(
         env=self.env,
         base_path=self.base_path,
         metadata={'episode_id': self.episode_id},
         enabled=self.video_callable(self.episode_id)
     )
Exemple #6
0
 def __init__(self, gnet, opt, global_ep, global_ep_r, res_queue, name):
     super(Worker, self).__init__()
     self.name = 'w%i' % name
     self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue
     self.gnet, self.opt = gnet, opt
     self.lnet = Net(N_S, N_A)  # local network
     self.env = gym.make('Pendulum-v0').unwrapped
     self.vid = video_recorder.VideoRecorder(
         self.env, path="../recording/con-a3c-%i.mp4" % name)
Exemple #7
0
    def start_video_recorder(self):
        self.close_video_recorder()

        video_name = f"{self.name_prefix}-step-{self.step_id}-to-step-{self.step_id + self.video_length}"
        base_path = os.path.join(self.video_folder, video_name)
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env, base_path=base_path, metadata={"step_id": self.step_id}
        )

        self.video_recorder.capture_frame()
        self.recorded_frames = 1
        self.recording = True
Exemple #8
0
    def start_video_recorder(self):
        self.close_video_recorder()

        base_path = os.path.join(self.directory, self.video_name)
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.venv,
            base_path=base_path,
            metadata={'step_id': self.step_id})

        self.video_recorder.capture_frame()
        self.recorded_frames = 1
        self.recording = True
    def start_video_recorder(self):
        self.close_video_recorder()

        base_path = os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.step_id))
        self.video_recorder = video_recorder.VideoRecorder(
                env=self.venv,
                base_path=base_path,
                metadata={'step_id': self.step_id}
                )

        self.video_recorder.capture_frame()
        self.recorded_frames = 1
        self.recording = True
Exemple #10
0
    def start_video_recorder(self):
        self.close_video_recorder()

        video_name = '{}-step-{}-to-step-{}'.format(
            self.name_prefix, self.step_id, self.step_id + self.video_length)
        base_path = os.path.join(self.video_folder, video_name)
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env,
            base_path=base_path,
            metadata={'step_id': self.step_id})

        self.video_recorder.capture_frame()
        self.recorded_frames = 1
        self.recording = True
Exemple #11
0
    def reset_video_recorder(self):
        # Close any existing video recorder
        if self.video_recorder:
            self._close_video_recorder()

        # Start recording the next video.
        ep_id = '{:09}'.format(self.episode_id)
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env,
            base_path=os.path.join(self.directory,
                                   f'{self.file_prefix}.video.episode{ep_id}'),
            metadata={'episode_id': self.episode_id},
            enabled=self._video_enabled(),
        )
        self.video_recorder.capture_frame()
Exemple #12
0
    def reset_video_recorder(self):
        # Close any existing video recorder
        if self.video_recorder:
            self._close_video_recorder()

        # Start recording the next video.
        #
        # TODO: calculate a more correct 'episode_id' upon merge
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env,
            base_path=os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)),
            metadata={'episode_id': self.episode_id},
            enabled=self._video_enabled(),
        )
        self.video_recorder.capture_frame()
Exemple #13
0
    def reset_video_recorder(self):
        # Close any existing video recorder
        if self.video_recorder:
            self._close_video_recorder()

        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env,
            base_path=os.path.join(
                self.directory,
                '{}.video.{}.video{:06}'.format(self.file_prefix,
                                                self.file_infix,
                                                self.episode_id)),
            metadata={'episode_id': self.episode_id},
            enabled=self.video_flg,
        )
        self.video_recorder.capture_frame()
 def start_video_recorder(self):
     """Start video recorder"""
     # Close the recorder if already currently recording
     self.close_video_recorder()
     # Define video recording's path
     vid = "{}_step{}_to_step{}".format(self.prefix, self.step_id,
                                        self.step_id + self.video_length)
     path = osp.join(self.save_dir, vid)
     # Define video recorder
     self.video_recorder = video_recorder.VideoRecorder(
         env=self.env, base_path=path, metadata={'step_id': self.step_id})
     # Render and add a frame to the video
     self.video_recorder.capture_frame()
     # Update running statistics
     self.num_recorded_frames = 1
     self.recording = True
Exemple #15
0
    def start_video_recorder(self):
        self.close_video_recorder()

        base_path = os.path.join(
            self.directory, '{:04}-{:04}'.format(self.epoch_id, self.cycle_id))
        self.video_recorder = video_recorder.VideoRecorder(env=self.venv,
                                                           base_path=base_path,
                                                           metadata={
                                                               'epoch':
                                                               self.epoch_id,
                                                               'cycle':
                                                               self.cycle_id
                                                           })

        self.video_recorder.capture_frame()
        self.recorded_frames = 0
        self.recording = True
Exemple #16
0
    def start_video_recorder(self):
        self.close_video_recorder()

        # If video name already exists, apply unique numbered suffix.
        path = self.video_folder / (self.name_prefix + '.mp4')
        if path.exists():
            for i in itertools.count():
                path = self.video_folder / (self.name_prefix + f'_{i}.mp4')
                if not path.exists():
                    break

        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env, path=str(path), metadata={'step_id': self.step_id})

        self.video_recorder.capture_frame()
        self.recorded_frames = 1
        self.recording = True
Exemple #17
0
    def start_video_recorder(self):
        self.close_video_recorder()

        video_name = f"{self.name_prefix}-step-{self.step_id}"
        if self.episode_trigger:
            video_name = f"{self.name_prefix}-episode-{self.episode_id}"

        base_path = os.path.join(self.video_folder, video_name)
        self.video_recorder = video_recorder.VideoRecorder(
            env=self.env,
            base_path=base_path,
            metadata={"step_id": self.step_id, "episode_id": self.episode_id},
        )

        self.video_recorder.capture_frame()
        self.recorded_frames = 1
        self.recording = True
Exemple #18
0
    def reset(self):
        if self._new_video_every_reset:
            if self._recorder is not None:
                self._recorder.close()

            self._counter += 1
            if self._base_name is not None:
                self._vid_name = os.path.join(
                    self._base_path,
                    self._base_name + "_" + str(self._counter))
            else:
                self._vid_name = self._base_path + "_" + str(self._counter)

            self._recorder = video_recorder.VideoRecorder(self.env,
                                                          path=self._vid_name +
                                                          ".mp4")

        return self.env.reset()
    def __init__(self):

        self.metadata = {'render.modes': ['human']}
        super(Env, self).__init__()
        self.env = gym.make("multicomp/KickAndDefend-v0")
        self.now = 0
        self.vr = video_recorder.VideoRecorder(
            self.env,
            "./videos/" + str(self.now) + ".mp4",
            enabled="./videos/" + str(self.now) + ".mp4" is not None)
        policy_type = "lstm"

        config = argparse.Namespace(
            env='kick-and-defend',
            max_episodes=1000000,
            param_paths=[
                'agent-zoo/kick-and-defend/kicker/agent1_parameters-v2.pkl',
                'agent-zoo/kick-and-defend/defender/agent2_parameters-v1.pkl'
            ])

        param_paths = config.param_paths
        tf_config = tf.ConfigProto(inter_op_parallelism_threads=1,
                                   intra_op_parallelism_threads=1)
        sess = tf.Session(config=tf_config)
        sess.__enter__()
        self.c1 = 0
        self.c2 = 0
        self.policy = []
        self.policy.append(
            LSTMPolicy(scope="policy0",
                       reuse=False,
                       ob_space=self.env.observation_space.spaces[0],
                       ac_space=self.env.action_space.spaces[0],
                       hiddens=[128, 128],
                       normalize=True))
        sess.run(tf.variables_initializer(tf.global_variables()))
        params = [load_from_file(param_pkl_path=path) for path in param_paths]

        setFromFlat(self.policy[0].get_variables(), params[0])

        self.action_space = spaces.Box(low=-1, high=1, shape=(17, ))

        self.observation_space = spaces.Box(low=0, high=1, shape=(1, 384))
Exemple #20
0
def test_ppo(env_id, seed, path_to_policy_params, n_envs = 1):
    
    """
     env_id: typr str, identifies each environment uniquely
     num_timesteps: number of timesteps to run the algorithm
     seed: initial random seed
     policy: policy to be followed (mlp, cnn, lstm, etc)
     n_env: number of envs to run in parallel
     nminibatches: number of minibatches of mini batch gradient descent (first-order optimization) to update the policy params
     n_steps: number of steps in each update
    """
    
    # Train PPO algorithm for num_timesteps
    # stack 4 frames for the vectorized environment
    # Note: PPO2 works only with vectorized environment
    env = VecFrameStack(make_atari_env(env_id = env_id, num_env = n_envs, seed=seed), 4)
    # define the policy
    # create model object for class PPO2
    # The policy is CnnPolicy from stable baselines and has been trained for 2e7 time steps on Pong
    
    model = PPO2.load(path_to_policy_params)
    vr = video_recorder.VideoRecorder(env, base_path="./videos/Pong_test_without_attack", enabled="./videos/Pong_test_without_attack" is not None)
    
    obs = env.reset()
    ep_rew = [0.0]
    ep = 0
    for i in range(50000):
      action, _states = model.predict(obs)
      obs, rewards, dones, info = env.step(action)
      ep_rew[-1] += rewards
      env.render()
      vr.capture_frame()
      if dones:
        obs = env.reset()
        print('Net reward for episode ',ep,': ',ep_rew[-1])
        if((ep+1)%10 == 0):
          print('Mean reward for last 10 episodes: ',np.mean(ep_rew[-10:]))
        ep_rew.append(0.0)
        ep += 1
        print('Number of timesteps completed: ', i+1)
    env.close()
    vr.close()
Exemple #21
0
    def _reset_video_recorder(self) -> None:
        """Creates a video recorder if one does not already exist.

        Called at the start of each episode (by `reset`). When a video recorder is
        already present, it will only create a new one if `self.single_video == False`.
        """
        if self.video_recorder is not None:
            # Video recorder already started.
            if not self.single_video:
                # We want a new video for each episode, so destroy current recorder.
                self.video_recorder.close()
                self.video_recorder = None

        if self.video_recorder is None:
            # No video recorder -- start a new one.
            self.video_recorder = video_recorder.VideoRecorder(
                env=self.env,
                base_path=os.path.join(
                    self.directory, "video.{:06}".format(self.episode_id)
                ),
                metadata={"episode_id": self.episode_id},
            )
Exemple #22
0
    def __init__(self,
                 env,
                 base_path,
                 base_name=None,
                 new_video_every_reset=False):
        super(VideoWrapper, self).__init__(env)

        self._base_path = base_path
        self._base_name = base_name

        self._new_video_every_reset = new_video_every_reset
        if self._new_video_every_reset:
            self._counter = 0
            self._recorder = None
        else:
            if self._base_name is not None:
                self._vid_name = os.path.join(self._base_path, self._base_name)
            else:
                self._vid_name = self._base_path
            self._recorder = video_recorder.VideoRecorder(self.env,
                                                          path=self._vid_name +
                                                          ".mp4")
Exemple #23
0
                rew_mean.append(mean)
                rew_var.append(var)
                max_reward = np.max(rewards_over_time)
                episode_max = np.argmax(rewards_over_time)
                if ep_rew_total >=300 :
                    w = w + 1
                    agent.save(s_link)
                                        
                print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
                print("Episode: ", i)
                print("Time: ", np.round(time_space, 2),"secs")
                print("Reward:", ep_rew_total)
                print("Maximum Reward: " + str(max_reward) + "  on Episode: " + str(episode_max))
                print("Times win: " + str(w))
                if i % 100 ==0:
                    vids = video_recorder.VideoRecorder(env, path=(str(i) + '.mp4'))
                    env.unwrapped.render()
                    vids.capture_frame()
                    print("Mean reward of the past 100 episodes: ", str(np.mean(rewards_over_time[-100:])))
                    mean_100.append(np.mean(rewards_over_time[-100:]))
                    f = open('results.txt','a')
                    f.write('\n' + str(np.mean(rewards_over_time[-100:])))
                    f.close()
                    vids.close()

                
                # Start training the Neural Network
                hist, mm= agent.TRAIN(BATCH) 
                

                epsilon.append(agent.e)
Exemple #24
0
 def StartRecording(self, video_filename: t.Text):
     """Starts to record a new animation; requires plot=True."""
     self._recorder = video_recorder.VideoRecorder(self._gym_env,
                                                   video_filename,
                                                   enabled=True)
Exemple #25
0
from torch.utils.tensorboard import SummaryWriter

from rlberry.agents.torch.dqn import DQNAgent
from rlberry.utils.logging import configure_logging

from gym.wrappers.monitoring import video_recorder

configure_logging(level="INFO")

env = gym_make("CartPole-v0")
agent = DQNAgent(env, epsilon_decay_interval=1000)
agent.set_writer(SummaryWriter())

print(f"Running DQN on {env}")

agent.fit(budget=50)
vid = video_recorder.VideoRecorder(
    env,
    path="_video/video_plot_dqn.mp4",
    enabled=True,
)

for episode in range(3):
    done = False
    state = env.reset()
    while not done:
        action = agent.policy(state)
        state, reward, done, _ = env.step(action)
        vid.capture_frame()
env.close()
                    default=False,
                    action='store_true')
args = parser.parse_args()
np.set_printoptions(precision=3)

# enable dynamic GPU memory allocation
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0

# Initialize OpenAI Procgen environment
env = gym.make("procgen:procgen-starpilot-v0",
               num_levels=0,
               start_level=0,
               distribution_mode="easy")
if args.record_video:
    video = video_recorder.VideoRecorder(
        env, base_path='../data/videos/recent_video')
    video.frames_per_sec = 60
    args.total_steps = args.batch_size  # only run one recorded batch
else:
    video = None

# Initialize Arrays
sim_steps = 0
rewards_record = {}
rewards_record['means'] = np.array([])
rewards_record['stds'] = np.array([])
rewards_record['max'] = np.array([])

# Initialize streamlit objects
st.header('Visualize Current Simulation Batch')
st.subheader('Training Batch Progress')
Exemple #27
0
from pyvirtualdisplay import Display
from gym.wrappers.monitoring import video_recorder

d = Display()
d.start()

# Recording filename
video_name = "./vid/Task2-2.mp4"

# Setup the environment for the maze
env = gym.make("maze-sample-10x10-v0")

# Setup the video
vid = None
vid = video_recorder.VideoRecorder(env,video_name)

# env = gym.wrappers.Monitor(env,'./vid',force=True)
current_state = env.reset()

"""# Evaluation of Epsilon"""

# For testing Epsilon I fix the gamma at 1 and learning rate (alpha) at 0.1

states_dic = {} #dictionary to keep the states/coordinates of the Q table
count = 0
for i in range(10):
    for j in range(10):
        states_dic[i, j] = count
        count+=1