def __init__(self, low, high, shape=None, dtype=np.float32): assert dtype is not None, 'dtype must be explicitly provided. ' self.dtype = np.dtype(dtype) # determine shape if it isn't provided directly if shape is not None: shape = tuple(shape) assert np.isscalar( low ) or low.shape == shape, "low.shape doesn't match provided shape" assert np.isscalar( high ) or high.shape == shape, "high.shape doesn't match provided shape" elif not np.isscalar(low): shape = low.shape assert np.isscalar( high ) or high.shape == shape, "high.shape doesn't match low.shape" elif not np.isscalar(high): shape = high.shape assert np.isscalar( low ) or low.shape == shape, "low.shape doesn't match high.shape" else: raise ValueError( "shape must be provided or inferred from the shapes of low or high" ) if np.isscalar(low): low = np.full(shape, low, dtype=dtype) if np.isscalar(high): high = np.full(shape, high, dtype=dtype) self.shape = shape self.low = low self.high = high def _get_precision(dtype): if np.issubdtype(dtype, np.floating): return np.finfo(dtype).precision else: return np.inf low_precision = _get_precision(self.low.dtype) high_precision = _get_precision(self.high.dtype) dtype_precision = _get_precision(self.dtype) if min(low_precision, high_precision) > dtype_precision: logger.warn("Box bound precision lowered by casting to {}".format( self.dtype)) self.low = self.low.astype(self.dtype) self.high = self.high.astype(self.dtype) # Boolean arrays which indicate the interval type for each coordinate self.bounded_below = -np.inf < self.low self.bounded_above = np.inf > self.high super(Box, self).__init__(self.shape, self.dtype)
def step(self, action): err_msg = "%r (%s) invalid" % (action, type(action)) assert self.action_space.contains(action), err_msg x, x_dot, theta, theta_dot = self.state force = self.force_mag if action == 1 else -self.force_mag costheta = math.cos(theta) sintheta = math.sin(theta) # For the interested reader: # https://coneural.org/florian/papers/05_cart_pole.pdf temp = (force + self.polemass_length * theta_dot**2 * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta * temp) / ( self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass if self.kinematics_integrator == 'euler': x = x + self.tau * x_dot x_dot = x_dot + self.tau * xacc theta = theta + self.tau * theta_dot theta_dot = theta_dot + self.tau * thetaacc else: # semi-implicit euler x_dot = x_dot + self.tau * xacc x = x + self.tau * x_dot theta_dot = theta_dot + self.tau * thetaacc theta = theta + self.tau * theta_dot self.state = (x, x_dot, theta, theta_dot) done = bool(x < -self.x_threshold or x > self.x_threshold or theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians) if not done: reward = 1.0 elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = -100.0 else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this " "environment has already returned done = True. You " "should always call 'reset()' once you receive 'done = " "True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 reward = 0.0 info = {'r': reward, 'n': 0, 'a': 0, 'p': 0} self.rewards.append(reward) self.infos.append(info) return np.array(self.state), reward, done, info
def _encode_image_frame(self, frame): if not self.encoder: self.encoder = ImageEncoder(self.path, frame.shape, self.frames_per_sec) self.metadata['encoder_version'] = self.encoder.version_info try: self.encoder.capture_frame(frame) except error.InvalidFrame as e: logger.warn( 'Tried to pass invalid video frame, marking as broken: %s', e) self.broken = True else: self.empty = False
def close_extras(self, timeout=None, terminate=False): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `close` times out. If `None`, the call to `close` never times out. If the call to `close` times out, then all processes are terminated. terminate : bool (default: `False`) If `True`, then the `close` operation is forced and all processes are terminated. """ timeout = 0 if terminate else timeout try: if self._state != AsyncState.DEFAULT: logger.warn('Calling `close` while waiting for a pending ' 'call to `{0}` to complete.'.format( self._state.value)) function = getattr(self, '{0}_wait'.format(self._state.value)) function(timeout) except mp.TimeoutError: terminate = True if terminate: for process in self.processes: if process.is_alive(): process.terminate() else: for pipe in self.parent_pipes: if (pipe is not None) and (not pipe.closed): pipe.send(('close', None)) for pipe in self.parent_pipes: if (pipe is not None) and (not pipe.closed): pipe.recv() for pipe in self.parent_pipes: if pipe is not None: pipe.close() for process in self.processes: process.join()
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( 'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def __init__(self, env_fns, observation_space=None, action_space=None, shared_memory=True, copy=True, context=None, daemon=True, worker=None): try: ctx = mp.get_context(context) except AttributeError: logger.warn('Context switching for `multiprocessing` is not ' 'available in Python 2. Using the default context.') ctx = mp self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy if (observation_space is None) or (action_space is None): dummy_env = env_fns[0]() observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super(AsyncVectorEnv, self).__init__(num_envs=len(env_fns), observation_space=observation_space, action_space=action_space) if self.shared_memory: _obs_buffer = create_shared_memory(self.single_observation_space, n=self.num_envs, ctx=ctx) self.observations = read_from_shared_memory( _obs_buffer, self.single_observation_space, n=self.num_envs) else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker target = worker or target with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process( target=target, name='Worker<{0}>-{1}'.format(type(self).__name__, idx), args=(idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue)) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = daemon process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_observation_spaces()