Esempio n. 1
0
    def __init__(self, venv, directory, record_video_trigger, video_length=200):
        """
        # Arguments
            venv: VecEnv to wrap
            directory: Where to save videos
            record_video_trigger:
                Function that defines when to start recording.
                The function takes the current number of step,
                and returns whether we should start recording or not.
            video_length: Length of recorded video
        """

        VecEnvWrapper.__init__(self, venv)
        self.record_video_trigger = record_video_trigger
        self.video_recorder = None

        self.directory = os.path.abspath(directory)
        if not os.path.exists(self.directory): os.mkdir(self.directory)

        self.file_prefix = "vecenv"
        self.file_infix = '{}'.format(os.getpid())
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
Esempio n. 2
0
    def __init__(self, env_fns, nstack, spaces=None):
        import baselines.common.vec_env.subproc_vec_env as VecEnv

        def worker(remote, parent_remote, env_fn_wrapper):
            parent_remote.close()
            env = env_fn_wrapper.x()
            while True:
                cmd, data = remote.recv()
                if cmd == 'step':
                    ob, reward, done, info = env.step(data)
                    if done:
                        ob = env.reset()
                    remote.send((ob, reward, done, info))
                elif cmd == 'reset':
                    ob = env.reset()
                    remote.send(ob)
                elif cmd == 'render':
                    remote.send(env.render(mode='rgb_array'))
                elif cmd == 'close':
                    remote.close()
                    break
                elif cmd == 'get_spaces':
                    remote.send((env.observation_space, env.action_space))
                elif cmd == 'is_human':
                    remote.send(env.unwrapped.is_human)
                elif cmd == 'get_game_state':
                    remote.send(env.unwrapped.game_state)
                else:
                    raise NotImplementedError

        VecEnv.worker = worker
        venv = VecEnv.SubprocVecEnv(env_fns, spaces)
        venv = VecFrameStack(venv, nstack)
        VecEnvWrapper.__init__(self, venv)
 def __init__(self,
              venv,
              ob=True,
              ret=True,
              clipob=10.,
              cliprew=10.,
              gamma=0.99,
              epsilon=1e-8,
              use_tf=False):
     VecEnvWrapper.__init__(self, venv)
     if use_tf:
         from baseline.common.running_mean_std import TfRunningMeanStd
         self.ob_rms = [
             TfRunningMeanStd(shape=self.observation_space[i].shape,
                              scope='ob_rms') for i in range(self.num_agent)
         ] if ob else None
         self.ret_rms = [
             TfRunningMeanStd(shape=(), scope='ret_rms')
             for _ in range(self.num_agent)
         ] if ret else None
     else:
         from baseline.common.running_mean_std import RunningMeanStd
         self.ob_rms = [
             RunningMeanStd(shape=self.observation_space[i].shape)
             for i in range(self.num_agent)
         ] if ob else None
         self.ret_rms = [
             RunningMeanStd(shape=()) for _ in range(self.num_agent)
         ] if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = [np.zeros(self.num_envs) for _ in range(self.num_agent)]
     self.gamma = gamma
     self.epsilon = epsilon
     self.num_agent = venv.num_agent
Esempio n. 4
0
    def __init__(self, venv, num_models, model_dir):
        VecEnvWrapper.__init__(self, venv)

        self.graph = tf.Graph()

        config = tf.ConfigProto(device_count={'GPU': 0})  # Run on CPU
        #config.gpu_options.allow_growth = True
        self.sess = tf.Session(graph=self.graph, config=config)

        with self.graph.as_default():
            with self.sess.as_default():
                import os, sys
                dir_path = os.path.dirname(os.path.realpath(__file__))
                sys.path.append(os.path.join(dir_path, '..', '..', '..', '..'))
                from preference_learning import Model

                print(os.path.realpath(model_dir))

                self.models = []
                for i in range(num_models):
                    with tf.variable_scope('model_%d' % i):
                        model = Model(self.venv.observation_space.shape[0])
                        model.saver.restore(self.sess,
                                            model_dir + '/model_%d.ckpt' % (i))
                    self.models.append(model)
        """
Esempio n. 5
0
    def __init__(self, venv, pretrained_reward_net_path, chain_path,
                 embedding_dim, env_name):
        VecEnvWrapper.__init__(self, venv)
        self.reward_net = EmbeddingNet(embedding_dim)
        #load the pretrained weights
        self.reward_net.load_state_dict(torch.load(pretrained_reward_net_path))
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        #load the mean of the MCMC chain
        burn = 5000
        skip = 20
        reader = open(chain_path)
        data = []
        for line in reader:
            parsed = line.strip().split(',')
            np_line = []
            for s in parsed[:-1]:
                np_line.append(float(s))
            data.append(np_line)
        data = np.array(data)
        #print(data[burn::skip,:].shape)

        #get average across chain and use it as the last layer in the network
        mean_weight = np.mean(data[burn::skip, :], axis=0)
        #print("mean weights", mean_weight[:-1])
        #print("mean bias", mean_weight[-1])
        #print(mean_weight.shape)
        self.reward_net.fc2 = nn.Linear(
            embedding_dim, 1, bias=False
        )  #last layer just outputs the scalar reward = w^T \phi(s)

        new_linear = torch.from_numpy(mean_weight)
        print("new linear", new_linear)
        print(new_linear.size())
        with torch.no_grad():
            #unsqueeze since nn.Linear wants a 2-d tensor for weights
            new_linear = new_linear.unsqueeze(0)
            #print("new linear", new_linear)
            #print("new bias", new_bias)
            with torch.no_grad():
                #print(last_layer.weight)
                #print(last_layer.bias)
                #print(last_layer.weight.data)
                #print(last_layer.bias.data)
                self.reward_net.fc2.weight.data = new_linear.float().to(
                    self.device)

            #TODO: print out last layer to make sure it stuck...
            print("USING MEAN WEIGHTS FROM MCMC")
            #with torch.no_grad():
            #    for param in self.reward_net.fc2.parameters():
            #        print(param)

        self.reward_net.to(self.device)

        self.rew_rms = RunningMeanStd(shape=())
        self.epsilon = 1e-8
        self.cliprew = 10.
        self.env_name = env_name
Esempio n. 6
0
    def __init__(self,
                 venv,
                 directory,
                 video_name,
                 record_video_trigger,
                 video_length=200):
        """
        # Arguments
            venv: VecEnv to wrap
            directory: Where to save videos
            record_video_trigger:
                Function that defines when to start recording.
                The function takes the current number of step,
                and returns whether we should start recording or not.
            video_length: Length of recorded video
        """
        #print("initializing vecvideorecordernamed")
        #print(directory)
        #print(video_name)
        VecEnvWrapper.__init__(self, venv)
        self.record_video_trigger = record_video_trigger
        self.video_recorder = None

        self.directory = os.path.abspath(directory)
        if not os.path.exists(self.directory): os.mkdir(self.directory)
        self.video_name = video_name

        self.file_prefix = "vecenv"
        self.file_infix = '{}'.format(os.getpid())
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
Esempio n. 7
0
    def __init__(self,
                 venv,
                 ob=True,
                 ret=True,
                 clipob=10.,
                 cliprew=10.,
                 gamma=0.99,
                 epsilon=1e-8):
        VecEnvWrapper.__init__(self, venv)
        try:
            self.num_agents = num_agents = len(self.observation_space)
            self.ob_rms = [
                RunningMeanStd(shape=self.observation_space[k].shape)
                for k in range(num_agents)
            ] if ob else None
        except:
            self.num_agents = num_agents = len(self.observation_space.spaces)
            self.ob_rms = [
                RunningMeanStd(shape=self.observation_space.spaces[k].shape)
                for k in range(num_agents)
            ] if ob else None

        self.ret_rms = RunningMeanStd(shape=()) if ret else None
        #[RunningMeanStd(shape=()) for k in range(num_agents)] if ret else None
        self.clipob = clipob
        self.cliprew = cliprew
        # self.ret = [np.zeros(self.num_envs) for _ in range(num_agents)]
        self.ret = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon
Esempio n. 8
0
 def __init__(self,
              venv,
              norm_obs=True,
              norm_reward=True,
              clip_obs=10.,
              clip_reward=10.,
              gamma=0.99,
              epsilon=1e-8):
     """
     A rolling average, normalizing, vectorized wrapepr for environment base class
     
     :param venv: ([Gym Environment]) the list of environments to vectorize and normalize
     :param norm_obs: (bool) normalize observation
     :param norm_reward: (bool) normalize reward with discounting (r = sum(r_old) * gamma + r_new)
     :param clip_obs: (float) clipping value for nomalizing observation
     :param clip_reward: (float) clipping value for nomalizing reward
     :param gamma: (float) discount factor
     :param epsilon: (float) epsilon value to avoid arithmetic issues
     """
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(
         shape=self.observation_space.shape) if norm_obs else None
     self.ret_rms = RunningMeanStd(shape=()) if norm_reward else None
     self.clip_obs = clip_obs
     self.clip_reward = clip_reward
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
Esempio n. 9
0
 def __init__(self, venv, render=False):
     # self.reward_range = (-float('inf'), float('inf'))
     VecEnvWrapper.__init__(self, venv)
     h, w, c = venv.observation_space.shape
     self.observation_space = gym.spaces.Box(0, 255, shape=[h, w, c + 1])
     # init Goal manager
     self.gms = [Goal(i) for i in range(self.num_envs)]
     self.m_Render = render
Esempio n. 10
0
 def __init__(self, venv, nstack):
     self.venv = venv
     self.nstack = nstack
     wos = venv.observation_space # wrapped ob space
     low = np.repeat(wos.low, self.nstack, axis=-1)
     high = np.repeat(wos.high, self.nstack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs,)+low.shape, low.dtype)
     observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
 def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
     self.ret_rms = None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
Esempio n. 12
0
 def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
     self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
Esempio n. 13
0
 def __init__(self, env, *, encoder):
     VecEnvWrapper.__init__(self, env)
     self.encoder = encoder
     self.observation_space = spaces.Box(
         shape=(self.encoder.d_embedding,),
         low=np.finfo(np.float32).min,
         high=np.finfo(np.float32).max
     )
     print("Wrapping with encoder")
Esempio n. 14
0
 def __init__(self, venv, nstack):
     self.venv = venv
     self.nstack = nstack
     wos = venv.observation_space # wrapped ob space
     low = np.repeat(wos.low, self.nstack, axis=-1)
     high = np.repeat(wos.high, self.nstack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs,)+low.shape, low.dtype)
     observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Esempio n. 15
0
 def __init__(self, venv, filename=None, keep_buf=0, info_keywords=()):
     VecEnvWrapper.__init__(self, venv)
     self.eprets = None
     self.eplens = None
     self.epcount = 0
     self.tstart = time.time()
     self.info_keywords = info_keywords
     self.keep_buf = keep_buf
     self.num_agent = venv.num_agent
     if self.keep_buf:
         self.epret_buf = deque([], maxlen=keep_buf)
         self.eplen_buf = deque([], maxlen=keep_buf)
Esempio n. 16
0
    def __init__(self, venv, reward_net_path, combo_param):
        VecEnvWrapper.__init__(self, venv)
        self.reward_net = AtariNet()
        self.reward_net.load_state_dict(torch.load(reward_net_path))
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.reward_net.to(self.device)

        self.lamda = combo_param  #how much weight to give to IRL verus RL combo_param \in [0,1] with 0 being RL and 1 being IRL
        self.rew_rms = RunningMeanStd(shape=())
        self.epsilon = 1e-8
        self.cliprew = 10.
Esempio n. 17
0
 def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8, reward_scale=1., update=True):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
     self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
     self.variables_name_save = ['clipob','cliprew','ret','gamma', 'epsilon'  ]
     self.reward_scale = reward_scale
     self.update = update
Esempio n. 18
0
    def __init__(self, venv, reward_net_path, env_name):
        VecEnvWrapper.__init__(self, venv)
        self.reward_net = AtariNet()
        self.reward_net.load_state_dict(torch.load(reward_net_path))
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.reward_net.to(self.device)

        self.rew_rms = RunningMeanStd(shape=())
        self.epsilon = 1e-8
        self.cliprew = 10.
        self.env_name = env_name
Esempio n. 19
0
 def __init__(self, venv, nstack, device):
     self.venv = venv
     self.nstack = nstack
     wos = venv.observation_space  # wrapped ob space
     self.shape_dim0 = wos.low.shape[0]
     low = np.repeat(wos.low, self.nstack, axis=0)
     high = np.repeat(wos.high, self.nstack, axis=0)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape)
     self.stackedobs = torch.from_numpy(self.stackedobs).float()
     self.stackedobs = self.stackedobs.to(device)
     observation_space = gym.spaces.Box(low=low,
                                        high=high,
                                        dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Esempio n. 20
0
    def __init__(self, venv, nstack, device=None):
        self.venv = venv
        self.nstack = nstack

        wos = venv.observation_space  # wrapped ob space
        self.shape_dim0 = wos.shape[0]

        low = np.repeat(wos.low, self.nstack, axis=0)
        high = np.repeat(wos.high, self.nstack, axis=0)

        if device is None:
            device = torch.device("cpu")
        self.stacked_obs = torch.zeros((venv.num_envs,) + low.shape).to(device)

        observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
    def __init__(self, venv):
        VecEnvWrapper.__init__(self, venv)

        self.graph = tf.Graph()

        config = tf.ConfigProto(device_count={'GPU': 0})  # Run on CPU
        #config.gpu_options.allow_growth = True
        self.sess = tf.Session(graph=self.graph, config=config)

        with self.graph.as_default():
            with self.sess.as_default():
                self.obs = tf.placeholder(tf.float32, [None, 84, 84, 4])

                self.rewards = tf.reduce_mean(tf.random_normal(
                    tf.shape(self.obs)),
                                              axis=[1, 2, 3])
Esempio n. 22
0
File: envs.py Progetto: zyr17/RL
    def __init__(self, venv, nstack):
        self.venv = venv
        self.nstack = nstack

        wos = venv.observation_space  # wrapped ob space
        self.shape_dim0 = wos.shape[0]

        low = np.repeat(wos.low, self.nstack, axis=0)
        high = np.repeat(wos.high, self.nstack, axis=0)

        self.stacked_obs = cuda(torch.zeros((venv.num_envs, ) + low.shape))

        observation_space = gym.spaces.Box(low=low,
                                           high=high,
                                           dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Esempio n. 23
0
	def __init__(self, venv, filename=None, keep_buf=0, info_keywords=()):
		VecEnvWrapper.__init__(self, venv)
		self.eprets = np.zeros(self.num_envs, 'f')
		self.eplens = np.zeros(self.num_envs, 'i')
		self.epcount = 0
		self.tstart = time.time()
		if filename:
			self.results_writer = ResultsWriter(filename, header={'t_start': self.tstart},
				extra_keys=info_keywords)
		else:
			self.results_writer = None
		self.info_keywords = info_keywords
		self.keep_buf = keep_buf
		if self.keep_buf:
			self.epret_buf = deque([], maxlen=keep_buf)
			self.eplen_buf = deque([], maxlen=keep_buf)
    def __init__(self, venv, model_dir, ctrl_coeff=0., alive_bonus=0.):
        VecEnvWrapper.__init__(self, venv)

        ob_shape = venv.observation_space.shape
        ac_dims = venv.action_space.n if venv.action_space.dtype == int else venv.action_space.shape[
            -1]

        self.ctrl_coeff = ctrl_coeff
        self.alive_bonus = alive_bonus

        self.graph = tf.Graph()

        config = tf.ConfigProto(device_count={'GPU': 0})  # Run on CPU
        #config.gpu_options.allow_growth = True
        self.sess = tf.Session(graph=self.graph, config=config)

        with self.graph.as_default():
            with self.sess.as_default():
                import os, sys
                from argparse import Namespace
                from pathlib import Path

                dir_path = os.path.dirname(os.path.realpath(__file__))
                sys.path.append(os.path.join(dir_path, '..', '..', '..', '..'))
                from utils import Model, RewardNet

                print(os.path.realpath(model_dir))
                with open(str(Path(model_dir) / 'args.txt')) as f:
                    args = eval(f.read())

                models = []
                for i in range(args.num_models):
                    with tf.variable_scope('model_%d' % i):
                        net = RewardNet(args.include_action,
                                        ob_shape[-1],
                                        ac_dims,
                                        num_layers=args.num_layers,
                                        embedding_dims=args.embedding_dims)

                        model = Model(net, batch_size=1)
                        model.saver.restore(
                            self.sess,
                            os.path.join(model_dir, 'model_%d.ckpt' % i))

                        models.append(model)
                self.models = models
Esempio n. 25
0
 def __init__(self, venv, n_stack):
     """
     Vectorized environment base class
     
     :param venv: ([Gym Environment]) the list of environments to vectorize and normalize
     :param n_stack:
     """
     self.venv = venv
     self.n_stack = n_stack
     wrapped_obs_space = venv.observation_space
     low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1)
     high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low,
                                    high=high,
                                    dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Esempio n. 26
0
    def __init__(self,
                 venv,
                 ob_rms,
                 ret_rms,
                 clipob=10.,
                 cliprew=10.,
                 gamma=0.99,
                 epsilon=1e-8):
        VecEnvWrapper.__init__(self, venv)
        self.ob_rms = self.RMS(*ob_rms)
        self.ret_rms = self.RMS(*ret_rms)
        self.clipob = clipob
        self.cliprew = cliprew
        self.ret = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon

        assert self.ob_rms.mean.shape == self.ob_rms.var.shape == self.observation_space.shape
Esempio n. 27
0
    def __init__(self, venv, priors, device=None):
        self.venv = venv
        self.nstack = venv.nstack
        self.addit_ch = 3
        self.priors = priors
        wos = venv.observation_space  # wrapped ob space
        w_shape = wos.shape
        low = np.zeros((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2]))
        high = np.full((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2]),
                       255)

        if device is None:
            self.device = torch.device('cpu')

        self.full_obs = torch.zeros((venv.num_envs, ) + low.shape).to(device)
        observation_space = gym.spaces.Box(low=low,
                                           high=high,
                                           dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Esempio n. 28
0
 def __init__(self, venv, nstack):
     self.venv = venv
     self.nstack = nstack
     self.dict_obs = False
     if isinstance(venv.observation_space, gym.spaces.Dict):
         wos = venv.observation_space.spaces['observation']
         self.dict_obs = True
     else:
         wos = venv.observation_space  # wrapped ob space
     low = np.repeat(wos.low, self.nstack, axis=-1)
     high = np.repeat(wos.high, self.nstack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low,
                                    high=high,
                                    dtype=venv.observation_space.dtype)
     if self.dict_obs:
         _observation_space = venv.observation_space
         _observation_space.spaces['observation'] = observation_space
         observation_space = _observation_space
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Esempio n. 29
0
    def __init__(self, venv, nstack, device=None):
        self.venv   = venv
        self.nstack = nstack
        wrapped_ob_space = venv.observation_space # should be 1 x 84 x 84
        self.shape_dim0  = wrapped_ob_space.shape[0] # shape_dim0 is 1

        # wrapped_ob_space.low is ZERO matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now
        # wrapped_ob_space.high is 255-matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now
        low  = np.repeat(wrapped_ob_space.low,  self.nstack, axis=0) 
        high = np.repeat(wrapped_ob_space.high, self.nstack, axis=0) 

        if device is None:
            device = torch.device('cpu')
        new_shape_tuple = (venv.num_envs, ) + low.shape # num_processes x 4 x 84 x 84 
        self.stacked_obs = torch.zeros(new_shape_tuple).to(device)

        observation_space = gym.spaces.Box( 
            low=low, high=high, dtype=venv.observation_space.dtype)

        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Esempio n. 30
0
    def __init__(
        self,
        venv: Env,
        ob: bool = True,
        ret: bool = True,
        clipob: float = 10.0,
        cliprew: float = 10.0,
        gamma: float = 0.99,
        epsilon: float = 1e-8,
        first_n: int = None,
    ) -> None:
        """
        Modified init function of VecNormalize. The only change here is in modifying the
        shape of self.ob_rms. The argument ``first_n`` controls how much of the
        observation we want to normalize: for an observation ``obs``, we normalize the
        vector ``obs[:first_n]``.
        """

        VecEnvWrapper.__init__(self, venv)
        if ob is not None:
            if first_n is None:
                self.ob_rms = RunningMeanStd(
                    shape=self.observation_space.shape)
            else:
                if len(self.observation_space.shape) == 1:
                    self.ob_rms = RunningMeanStd(shape=(first_n, ))
                else:
                    raise NotImplementedError
        else:
            self.ob_rms = None
        self.ret_rms = RunningMeanStd(shape=()) if ret else None
        self.clipob = clipob
        self.cliprew = cliprew
        self.ret = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon
        self.first_n = first_n
Esempio n. 31
0
 def __init__(self, venv):
     self.venv = venv
     VecEnvWrapper.__init__(self, venv)
Esempio n. 32
0
 def __init__(self, venv):
     VecEnvWrapper.__init__(self, venv)
     self.episodic_rets = None
     self.episodic_lens = None
Esempio n. 33
0
 def __init__(self, env, *, reward_network):
     VecEnvWrapper.__init__(self, env)
     self.reward_network = reward_network
     self.prev_obs = None