Example #1
0
    def __init__(self, venv, directory, record_video_trigger, video_length=200):
        """
        # Arguments
            venv: VecEnv to wrap
            directory: Where to save videos
            record_video_trigger:
                Function that defines when to start recording.
                The function takes the current number of step,
                and returns whether we should start recording or not.
            video_length: Length of recorded video
        """

        VecEnvWrapper.__init__(self, venv)
        self.record_video_trigger = record_video_trigger
        self.video_recorder = None

        self.directory = os.path.abspath(directory)
        if not os.path.exists(self.directory): os.mkdir(self.directory)

        self.file_prefix = "vecenv"
        self.file_infix = '{}'.format(os.getpid())
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
 def __init__(self, venv, nstack):
     self.venv = venv
     self.nstack = nstack
     wos = venv.observation_space # wrapped ob space
     low = np.repeat(wos.low, self.nstack, axis=-1)
     high = np.repeat(wos.high, self.nstack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs,)+low.shape, low.dtype)
     observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
 def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
     self.ret_rms = None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
Example #4
0
    def __init__(self, venv, nstack, device=None):
        self.venv = venv
        self.nstack = nstack

        wos = venv.observation_space  # wrapped ob space
        self.shape_dim0 = wos.shape[0]

        low = np.repeat(wos.low, self.nstack, axis=0)
        high = np.repeat(wos.high, self.nstack, axis=0)

        if device is None:
            device = torch.device('cpu')
        self.stacked_obs = torch.zeros((venv.num_envs, ) +
                                       low.shape).to(device)

        observation_space = gym.spaces.Box(
            low=low, high=high, dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Example #5
0
 def __init__(self,
              venv,
              visual_obs=True,
              ret=True,
              clipob=10.,
              cliprew=10.,
              gamma=0.99,
              epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(
         shape=self.observation_space.spaces['visual'].shape
     ) if visual_obs else None
     self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
     self.training = True
Example #6
0
    def __init__(self,
                 venv,
                 obs=False,
                 ret=False,
                 clipob=10.,
                 cliprew=10.,
                 gamma=0.99,
                 epsilon=1e-8):
        VecEnvWrapper.__init__(self, venv)

        self.obs_rms = RunningMeanStd(
            shape=self.observation_space.shape) if obs else None
        self.ret_rms = RunningMeanStd(shape=()) if ret else None

        self.clipob = clipob
        self.cliprew = cliprew
        self.ret = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon
Example #7
0
    def __init__(self, venv, priors, device=None):
        self.venv = venv
        self.nstack = venv.nstack
        self.addit_ch = 3
        self.priors = priors
        wos = venv.observation_space  # wrapped ob space
        w_shape = wos.shape
        low = np.zeros((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2]))
        high = np.full((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2]),
                       255)

        if device is None:
            self.device = torch.device('cpu')

        self.full_obs = torch.zeros((venv.num_envs, ) + low.shape).to(device)
        observation_space = gym.spaces.Box(low=low,
                                           high=high,
                                           dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Example #8
0
    def __init__(self, venv, nstack, device=None):
        self.venv   = venv
        self.nstack = nstack
        wrapped_ob_space = venv.observation_space # should be 1 x 84 x 84
        self.shape_dim0  = wrapped_ob_space.shape[0] # shape_dim0 is 1

        # wrapped_ob_space.low is ZERO matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now
        # wrapped_ob_space.high is 255-matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now
        low  = np.repeat(wrapped_ob_space.low,  self.nstack, axis=0) 
        high = np.repeat(wrapped_ob_space.high, self.nstack, axis=0) 

        if device is None:
            device = torch.device('cpu')
        new_shape_tuple = (venv.num_envs, ) + low.shape # num_processes x 4 x 84 x 84 
        self.stacked_obs = torch.zeros(new_shape_tuple).to(device)

        observation_space = gym.spaces.Box( 
            low=low, high=high, dtype=venv.observation_space.dtype)

        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Example #9
0
 def __init__(self,
              venv,
              ob=True,
              ret=True,
              clipob=10.,
              cliprew=10.,
              gamma=0.99,
              epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(
         shape=self.observation_space.shape) if ob else None
     self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
     self.variables_name_save = [
         'clipob', 'cliprew', 'ret', 'gamma', 'epsilon'
     ]
Example #10
0
 def __init__(self, venv, nstack):
     self.venv = venv
     self.nstack = nstack
     self.dict_obs = False
     if isinstance(venv.observation_space, gym.spaces.Dict):
         wos = venv.observation_space.spaces['observation']
         self.dict_obs = True
     else:
         wos = venv.observation_space  # wrapped ob space
     low = np.repeat(wos.low, self.nstack, axis=-1)
     high = np.repeat(wos.high, self.nstack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low,
                                    high=high,
                                    dtype=venv.observation_space.dtype)
     if self.dict_obs:
         _observation_space = venv.observation_space
         _observation_space.spaces['observation'] = observation_space
         observation_space = _observation_space
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Example #11
0
 def __init__(self,
              venv,
              ob=True,
              ret=True,
              clipob=10.,
              cliprew=10.,
              gamma=0.99,
              epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     if isinstance(self.observation_space, dict):
         self.ob_rms = RunningMeanStd(
             shape=self.observation_space["vector"].shape) if ob else None
     else:
         self.ob_rms = RunningMeanStd(
             shape=self.observation_space.shape) if ob else None
     self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
Example #12
0
    def __init__(self,
                 venv,
                 num_models,
                 model_dir,
                 include_action,
                 num_layers,
                 embedding_dims,
                 ctrl_coeff=0.,
                 alive_bonus=0.):
        VecEnvWrapper.__init__(self, venv)

        self.ctrl_coeff = ctrl_coeff
        self.alive_bonus = alive_bonus

        self.graph = tf.Graph()

        config = tf.ConfigProto(device_count={'GPU': 0})  # Run on CPU
        #config.gpu_options.allow_growth = True
        self.sess = tf.Session(graph=self.graph, config=config)

        with self.graph.as_default():
            with self.sess.as_default():
                import os, sys
                dir_path = os.path.dirname(os.path.realpath(__file__))
                sys.path.append(os.path.join(dir_path, '..', '..', '..', '..'))
                from preference_learning import Model

                print(os.path.realpath(model_dir))

                self.models = []
                for i in range(num_models):
                    with tf.variable_scope('model_%d' % i):
                        model = Model(include_action,
                                      self.venv.observation_space.shape[0],
                                      self.venv.action_space.shape[0],
                                      num_layers=num_layers,
                                      embedding_dims=embedding_dims)
                        model.saver.restore(self.sess,
                                            model_dir + '/model_%d.ckpt' % (i))
                    self.models.append(model)
Example #13
0
    def __init__(
        self,
        venv: Env,
        ob: bool = True,
        ret: bool = True,
        clipob: float = 10.0,
        cliprew: float = 10.0,
        gamma: float = 0.99,
        epsilon: float = 1e-8,
        first_n: int = None,
    ) -> None:
        """
        Modified init function of VecNormalize. The only change here is in modifying the
        shape of self.ob_rms. The argument ``first_n`` controls how much of the
        observation we want to normalize: for an observation ``obs``, we normalize the
        vector ``obs[:first_n]``.
        """

        VecEnvWrapper.__init__(self, venv)
        if ob is not None:
            if first_n is None:
                self.ob_rms = RunningMeanStd(
                    shape=self.observation_space.shape)
            else:
                if len(self.observation_space.shape) == 1:
                    self.ob_rms = RunningMeanStd(shape=(first_n, ))
                else:
                    raise NotImplementedError
        else:
            self.ob_rms = None
        self.ret_rms = RunningMeanStd(shape=()) if ret else None
        self.clipob = clipob
        self.cliprew = cliprew
        self.ret = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon
        self.first_n = first_n
 def __init__(self, venv):
     self.venv = venv
     VecEnvWrapper.__init__(self, venv)
Example #15
0
 def __init__(self, venv):
     VecEnvWrapper.__init__(self, venv)
     self.episodic_rets = None
     self.episodic_lens = None
Example #16
0
 def close(self):
     VecEnvWrapper.close(self)
     self.close_video_recorder()
Example #17
0
 def __init__(self, env, *, reward_network):
     VecEnvWrapper.__init__(self, env)
     self.reward_network = reward_network
     self.prev_obs = None
Example #18
0
 def __init__(self, venv, dim=6):
     VecEnvWrapper.__init__(self, venv)
     self.dim = self.action_space.n
Example #19
0
 def __init__(self, venv, r_model):
     VecEnvWrapper.__init__(self, venv)
     assert callable(r_model)
     self.r_model = r_model
 def __init__(self, venv):
     VecEnvWrapper.__init__(self, venv)
Example #21
0
 def close(self):
     VecEnvWrapper.close(self)
     self.close_video_recorder()