Ejemplo n.º 1
0
    def __init__(self, venv, desiredShape):
        self.vecenv = venv
        self.desiredShape = desiredShape
        (self.y, self.x, self.c) = desiredShape
        self.b = len(self.vecenv.remotes)

        # Manually get the dtype
        # self.vecenv.remotes[0].send(('get_spaces', None))
        # obsSpace, _ = self.vecenv.remotes[0].recv()
        # dtype = obsSpace.dtype
        # print(dtype)
        # print(np.iinfo(dtype).max)
        # print(np.iinfo(dtype).min)

        # Create the new shapes for actions and observations
        observation_space = gym.spaces.Box(low=0,
                                           high=255,
                                           shape=desiredShape,
                                           dtype=np.uint8)
        actionSpace = gym.spaces.Discrete(
            6)  #CHANGED AWAY FROM 6 FOR THE EXPERIEMENTS

        VecEnvWrapper.__init__(self,
                               venv,
                               observation_space=observation_space,
                               action_space=actionSpace)
Ejemplo n.º 2
0
 def __init__(self, venv, agent_idx):
     """ Got game results.
     :param: venv: environment.
     :param: agent_idx: the index of victim agent.
     """
     VecEnvWrapper.__init__(self, venv)
     self.outcomes = []
     self.num_games = 0
     self.agent_idx = agent_idx
Ejemplo n.º 3
0
 def __init__(self, venv, n_stack):
     self.venv = venv
     self.n_stack = n_stack
     wrapped_obs_space = venv.observation_space
     low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1)
     high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
     venv.frame_stack_pointer = self
Ejemplo n.º 4
0
 def __init__(self, venv, logdir, info_keywords=(), **kwargs):
     """
     A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
     :param env: (Gym environment) The environment
     :param filename: (str) the location to save tensorboard logs
     :param info_keywords: (tuple) extra information to log, from the information return of environment.step
     """
     VecEnvWrapper.__init__(self, venv=venv, **kwargs)
     self.writer = FileWriter(logdir)
     self.info_keywords = info_keywords
     self.episode_infos = [dict() for _ in range(self.venv.num_envs)]
     self.total_steps = 0
Ejemplo n.º 5
0
 def __init__(self,
              venv,
              raise_exception=False,
              warn_once=True,
              check_inf=True):
     VecEnvWrapper.__init__(self, venv)
     self.raise_exception = raise_exception
     self.warn_once = warn_once
     self.check_inf = check_inf
     self._actions = None
     self._observations = None
     self._user_warned = False
Ejemplo n.º 6
0
    def set_venv(self, venv):
        """
        Sets the vector environment to wrap to venv.

        Also sets attributes derived from this such as `num_env`.

        :param venv: (VecEnv)
        """
        if self.venv is not None:
            raise ValueError("Trying to set venv of already initialized VecNormalize wrapper.")
        VecEnvWrapper.__init__(self, venv)
        if self.obs_rms.mean.shape != self.observation_space.shape:
            raise ValueError("venv is incompatible with current statistics.")
        self.ret = np.zeros(self.num_envs)
Ejemplo n.º 7
0
 def __init__(self, venv, training=True, norm_obs=True, norm_reward=True,
              clip_obs=10., clip_reward=10., gamma=0.99, epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)
     self.ret_rms = RunningMeanStd(shape=())
     self.clip_obs = clip_obs
     self.clip_reward = clip_reward
     # Returns: discounted rewards
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
     self.training = training
     self.norm_obs = norm_obs
     self.norm_reward = norm_reward
     self.old_obs = np.array([])
Ejemplo n.º 8
0
    def __init__(self, env, output_directory, record_obs=False, format: str = "gif", unvec=False, reduction=12):
        VecEnvWrapper.__init__(self, env)
        prefix = "obs_" if record_obs else ""
        self.recorders = []
        self.reduction = reduction
        self.last = reduction
        if unvec:
            for i in range(self.num_envs):
                self.recorders.append(self._create_recorder(output_directory, prefix="{}_{}".format(i, prefix), format=format))
        else:
            self.recorders.append(self._create_recorder(output_directory, prefix, format))

        self.unvec = unvec
        self.record_obs = record_obs
        self.unvec = unvec
Ejemplo n.º 9
0
 def __init__(self, venv, filename=None, keep_buf=0, info_keywords=()):
     VecEnvWrapper.__init__(self, venv)
     print('init vecmonitor: ',filename)
     self.eprets = None
     self.eplens = None
     self.epcount = 0
     self.tstart = time.time()
     if filename:
         self.results_writer = ResultsWriter(filename, header={'t_start': self.tstart},
             extra_keys=info_keywords)
     else:
         self.results_writer = None
     self.info_keywords = info_keywords
     self.keep_buf = keep_buf
     if self.keep_buf:
         self.epret_buf = deque([], maxlen=keep_buf)
         self.eplen_buf = deque([], maxlen=keep_buf)
Ejemplo n.º 10
0
    def __init__(self,
                 venv,
                 video_folder,
                 record_video_trigger,
                 video_length=200,
                 name_prefix='rl-video'):

        VecEnvWrapper.__init__(self, venv)

        self.env = venv
        # Temp variable to retrieve metadata
        temp_env = venv

        # Unwrap to retrieve metadata dict
        # that will be used by gym recorder
        while isinstance(temp_env, VecNormalize) or isinstance(
                temp_env, VecFrameStack):
            temp_env = temp_env.venv

        if isinstance(temp_env, DummyVecEnv) or isinstance(
                temp_env, SubprocVecEnv):
            metadata = temp_env.get_attr('metadata')[0]
        else:
            metadata = temp_env.metadata

        self.env.metadata = metadata

        self.record_video_trigger = record_video_trigger
        self.video_recorder = None

        self.video_folder = os.path.abspath(video_folder)
        # Create output folder if needed
        os.makedirs(self.video_folder, exist_ok=True)

        self.name_prefix = name_prefix
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
Ejemplo n.º 11
0
 def __init__(self, venv):
     VecEnvWrapper.__init__(self, venv)
     VecMultiEnv.__init__(self, venv.num_envs, venv.num_agents,
                          venv.observation_space, venv.action_space)
Ejemplo n.º 12
0
 def close(self):
     VecEnvWrapper.close(self)
     self.close_video_recorder()
 def close(self):
     VecEnvWrapper.close(self)
Ejemplo n.º 14
0
 def __init__(self, env, dtype=np.float16):
     self.dtype= dtype
     self.observation_space = gym.spaces.Box(low=0, high=1.0, shape=env.observation_space.shape, dtype=dtype)
     VecEnvWrapper.__init__(self, env, observation_space=self.observation_space)
Ejemplo n.º 15
0
 def close(self):
     for recorder in self.recorders:
         recorder.close()
     VecEnvWrapper.close(self)
 def __init__(self, venv, r_model):
     VecEnvWrapper.__init__(self, venv)
     assert callable(r_model)
     self.r_model = r_model