def should_skip_env_spec_for_tests(spec): # We skip tests for envs that require dependencies or are otherwise # troublesome to run frequently ep = spec._entry_point # Skip mujoco tests for pull request CI if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')): return True try: import atari_py except ImportError: if ep.startswith('gym.envs.atari'): return True try: import Box2D except ImportError: if ep.startswith('gym.envs.box2d'): return True if ( 'GoEnv' in ep or 'HexEnv' in ep or (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest")) ): logger.warn("Skipping tests for env {}".format(ep)) return True return False
def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) state = self.state x, x_dot, theta, theta_dot = state force = self.force_mag if action==1 else -self.force_mag costheta = math.cos(theta) sintheta = math.sin(theta) temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass x = x + self.tau * x_dot x_dot = x_dot + self.tau * xacc theta = theta + self.tau * theta_dot theta_dot = theta_dot + self.tau * thetaacc self.state = (x,x_dot,theta,theta_dot) done = x < -self.x_threshold \ or x > self.x_threshold \ or theta < -self.theta_threshold_radians \ or theta > self.theta_threshold_radians done = bool(done) if not done: reward = 1.0 elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 reward = 0.0 return np.array(self.state), reward, done, {}
def step(self, action): assert self.action_space.contains(action) self.last_action = action inp_act, out_act, pred = action done = False reward = 0.0 self.time += 1 assert 0 <= self.write_head_position if out_act == 1: try: correct = pred == self.target[self.write_head_position] except IndexError: logger.warn("It looks like you're calling step() even though this "+ "environment has already returned done=True. You should always call "+ "reset() once you receive done=True. Any further steps are undefined "+ "behaviour.") correct = False if correct: reward = 1.0 else: # Bail as soon as a wrong character is written to the tape reward = -0.5 done = True self.write_head_position += 1 if self.write_head_position >= len(self.target): done = True self._move(inp_act) if self.time > self.time_limit: reward = -1.0 done = True obs = self._get_obs() self.last_reward = reward self.episode_total_reward += reward return (obs, reward, done, {})
def _start(self, directory, video_callable=None, force=False, resume=False, write_upon_reset=False, uid=None, mode=None): """Start monitoring. Args: directory (str): A per-training run directory where to record stats. video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording. force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym."). resume (bool): Retain the training data already in this directory, which will be merged with our new data write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.) uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid(). mode (['evaluation', 'training']): Whether this is an evaluation or training episode. """ if self.env.spec is None: logger.warn("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.") env_id = '(unknown)' else: env_id = self.env.spec.id if not os.path.exists(directory): logger.info('Creating monitor directory %s', directory) if six.PY3: os.makedirs(directory, exist_ok=True) else: os.makedirs(directory) if video_callable is None: video_callable = capped_cubic_video_schedule elif video_callable == False: video_callable = disable_videos elif not callable(video_callable): raise error.Error('You must provide a function, None, or False for video_callable, not {}: {}'.format(type(video_callable), video_callable)) self.video_callable = video_callable # Check on whether we need to clear anything if force: clear_monitor_files(directory) elif not resume: training_manifests = detect_training_manifests(directory) if len(training_manifests) > 0: raise error.Error('''Trying to write to monitor directory {} with existing monitor files: {}. You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5]))) self._monitor_id = monitor_closer.register(self) self.enabled = True self.directory = os.path.abspath(directory) # We use the 'openai-gym' prefix to determine if a file is # ours self.file_prefix = FILE_PREFIX self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid()) self.stats_recorder = stats_recorder.StatsRecorder(directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id) if not os.path.exists(directory): os.mkdir(directory) self.write_upon_reset = write_upon_reset if mode is not None: self._set_mode(mode)
def _encode_image_frame(self, frame): if not self.encoder: self.encoder = ImageEncoder(self.path, frame.shape, self.frames_per_sec) self.metadata['encoder_version'] = self.encoder.version_info try: self.encoder.capture_frame(frame) except error.InvalidFrame as e: logger.warn('Tried to pass invalid video frame, marking as broken: %s', e) self.broken = True else: self.empty = False
def should_skip_env_spec_for_tests(spec): # We skip tests for envs that require dependencies or are otherwise # troublesome to run frequently ep = spec._entry_point # Skip mujoco tests for pull request CI skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco'))) if skip_mujoco and ep.startswith('gym.envs.mujoco:'): return True if ( 'GoEnv' in ep or 'HexEnv' in ep or (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest")) ): logger.warn("Skipping tests for env {}".format(ep)) return True return False
def seed(self, seed=None): """Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list<bigint>: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ logger.warn("Could not seed environment %s", self) return
def patch_deprecated_methods(env): """ Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method. For backward compatibility, this makes it possible to work with unmodified environments. """ global warn_once if warn_once: logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env))) warn_once = False env.reset = env._reset env.step = env._step env.seed = env._seed def render(mode): return env._render(mode, close=False) def close(): env._render("human", close=True) env.render = render env.close = close
def __init__(self, low=None, high=None, shape=None, dtype=np.float32): """ Two kinds of valid input: Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided Box(np.array(low=[-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape """ if shape is None: assert low.shape == high.shape shape = low.shape else: assert np.isscalar(low) and np.isscalar(high) low = low + np.zeros(shape) high = high + np.zeros(shape) self.low = low.astype(dtype) self.high = high.astype(dtype) if (self.high == 255).all() and dtype != np.uint8: logger.warn('Box constructor got high=255 but dtype!=uint8') Space.__init__(self, shape, dtype)
def step(self, action): if self.curr_episode > self.episodes: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. You should always call 'initialize()' and 'reset()' once you receive 'done = True'" ) return action = np.array(action) if action.shape != (self.numAgents, 2): logger.error( "Incorrect dimenions of action. Action must have destination position for each agent" ) return self.curr_episode += 1 reward = np.zeros(self.numAgents) self.agentPosIncrements = np.array([(-1000.0, -1000.0)] * self.numAgents) agentReachedDest = [False] * self.numAgents for _ in xrange(self.updateRate): self.curr_step += 1 #Move targets for i in xrange(self.numTargets): self.moveTarget(i) #Move agent for i in xrange(self.numAgents): if not agentReachedDest[i]: agentReachedDest[i] = self.moveAgent(i, action[i]) else: #Already reached. Removes precision errors self.agentLocations[i] = action[i].astype('float32') #Calculate reward at this step reward += self.calculateAgentRewards()[0] if self.viewer is not None: self.render() return self.reset(), reward, self.curr_episode >= self.episodes, {}
def close_extras(self, timeout=None, terminate=False): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `close` times out. If `None`, the call to `close` never times out. If the call to `close` times out, then all processes are terminated. terminate : bool (default: `False`) If `True`, then the `close` operation is forced and all processes are terminated. """ timeout = 0 if terminate else timeout try: if self._state != AsyncState.DEFAULT: logger.warn('Calling `close` while waiting for a pending ' 'call to `{0}` to complete.'.format( self._state.value)) function = getattr(self, '{0}_wait'.format(self._state.value)) function(timeout) except mp.TimeoutError: terminate = True if terminate: for process in self.processes: if process.is_alive(): process.terminate() else: for pipe in self.parent_pipes: if (pipe is not None) and (not pipe.closed): pipe.send(('close', None)) for pipe in self.parent_pipes: if (pipe is not None) and (not pipe.closed): pipe.recv() for pipe in self.parent_pipes: if pipe is not None: pipe.close() for process in self.processes: process.join()
def step(self, action): """ implementation of the classic “agent-environment loop”. Args: action (object) : the board Returns: observation (object): reward (float) done (boolean) info (dict) """ #assert self.action_space.contains(action), f"{action} ({type(action)})" reward = 0 info = { 'turn': self.game.turns_count, 'move_type': None, 'player': self.game.current_player, 'player_name': ['white', 'black'][self.game.current_player] } if self.done: logger.warn( "You are calling 'step()' even though this environment has already returned done = True." "You should always call 'reset()' once you receive 'done = True'" "-- any further steps are undefined behavior.") else: pos0, pos1 = action move_check = self.game.action_handler(pos0, pos1, return_modif=True) if move_check: # if the move is a valid move move_type, self._modifications = move_check reward = Reward.method_1(self.game.board, move_type) # for debug info['move_type'] = move_type return self.observation, reward, self.done, info
def step(self, a): """ @brief Run a simulation step for a given. @param[in] a The action to perform (in the action space rather than the original torque space). @return The next observation, the reward, the status of the simulation (done or not), and a dictionary of extra information """ if self.continuous: torque = a else: torque = self.AVAIL_TORQUE[a] * self.torque_mag # Add noise to the force action if self.torque_noise_max > 0: torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max) # Bypass 'self.engine_py.step' method and use direct assignment to max out the performances self.engine_py._action[0] = torque self.engine_py.step(dt_desired=self.dt) self.state = self.engine_py.state # Get information info, obs = self._get_info() done = info['is_success'] # Make sure the simulation is not already over if done: if self.steps_beyond_done is None: self.steps_beyond_done = 0 else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 # Compute the reward reward = self.compute_reward(obs['achieved_goal'], self.goal, info) return obs, reward, done, info
def step(self, action): thrust = action[0] # Thrust command w = action[1] # Angular velocity command state = self.state ref_pos = self.ref_pos ref_vel = self.ref_vel pos = np.array([state[0], state[1]]).flatten() att = np.array([state[2]]).flatten() vel = np.array([state[3], state[4]]).flatten() acc = thrust / self.mass * np.array( [cos(att + pi / 2), sin(att + pi / 2)]) + self.g pos = pos + vel * self.dt + 0.5 * acc * self.dt * self.dt vel = vel + acc * self.dt att = att + w * self.dt self.state = (pos[0], pos[1], att, vel[0], vel[1]) done = linalg.norm(pos, 2) < -self.pos_threshold \ or linalg.norm(pos, 2) > self.pos_threshold \ or linalg.norm(vel, 2) < -self.vel_threshold \ or linalg.norm(vel, 2) > self.vel_threshold done = bool(done) if not done: reward = (-linalg.norm(pos, 2)) elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 reward = 0.0 return np.array(self.state), reward, done, {}
def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = self.state x, x_dot, theta, theta_dot = state force = self.force_mag if action == 1 else -self.force_mag costheta = math.cos(theta) sintheta = math.sin(theta) temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta * temp) / ( self.length * (4.0 / 3.0 - self.masspole * costheta * costheta / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass x = x + self.tau * x_dot x_dot = x_dot + self.tau * xacc theta = theta + self.tau * theta_dot theta_dot = theta_dot + self.tau * thetaacc self.state = (x, x_dot, theta, theta_dot) done = x < -self.x_threshold \ or x > self.x_threshold \ or theta < -self.theta_threshold_radians \ or theta > self.theta_threshold_radians done = bool(done) if not done: reward = 1.0 elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 reward = 0.0 return np.array(self.state), reward, done, {}
def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) #state = np.array(self.state).reshape(4, 4) goal_pos = list(zip(*np.where(self.state == 1)))[0] agent_pos = list(zip(*np.where(self.state == 3)))[0] #move new_agent_pos = np.array(agent_pos) if action==0: new_agent_pos[1]-=1 elif action==1: new_agent_pos[1]+=1 elif action==2: new_agent_pos[0]-=1 elif action==3: new_agent_pos[0]-=1 new_agent_pos = np.clip(new_agent_pos, 0, 3) self.state[agent_pos[0], agent_pos[1]] = 0 #moved from this position so it is empty self.state[new_agent_pos[0], new_agent_pos[1]] = 3 #moved to this position #self.state = tuple(self.state.flatten()) #check if done done=False if goal_pos==list(new_agent_pos): done=True #assign reward if not done: reward = 0 elif self.steps_beyond_done is None: # Just arrived at the goal self.steps_beyond_done = 0 reward = 1 else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 reward = 0.0 return self.state, reward, done, {}
def step(self, action): """ @brief Run a simulation step for a given action. @param[in] action The action to perform in the action space Set to None to NOT update the action. @return The next observation, the reward, the status of the episode (done or not), and a dictionary of extra information """ # Bypass 'self.engine_py.action' setter and use # direct assignment to max out the performances if action is None: action = self.action_prev self.engine_py._action[:] = action self.engine_py.step(dt_desired=self.dt) self.is_running = True self.action_prev = action # Extract information about the current simulation state self._update_observation(self.observation) done = self._is_done() self.learning_info = {'is_success': done} reward = self._compute_reward() # Make sure the simulation is not already over if done: if self._steps_beyond_done is None: self._steps_beyond_done = 0 else: if self._steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already \ returned done = True. You should always call 'reset()' once you \ receive 'done = True' -- any further steps are undefined behavior." ) self._steps_beyond_done += 1 return self.observation, reward, done, self.learning_info
def _step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = self.state reward = 0.0 if state == 0: if action == 0: if self.np_random.rand() < self.signal_prob: state = self.n_press else: reward = -1.0 else: if action == 1: # reducing state to 0 state -= 1 if state == 0: # this transition is rewarded reward = self.reward_seq_complete else: # if not pressing, then move directly to 0 state = 0 signal = [float(state == self.n_press)] self.state = state done = False done = bool(done) if not done: pass elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 return np.array(signal), reward, done, {}
def step(self, action): self._take_action(action) traci.simulationStep() self.current_step += 1 obs = self._next_observation() reward = self._get_reward() if self.is_done: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. " "You should always call 'reset()' once you receive 'done = True' " "-- any further steps are undefined behavior.") reward = 0.0 if self.current_step + 1 == self.steps_per_episode: self.is_done = True return obs, reward, self.is_done, {}
def render(self, mode='matplotlib'): """Renders this environment in its current state. Note that, in order to support rendering, `render=True` must be passed to the environment constructor. Arguments: mode(str) Rendering mode. Currently, only `"matplotlib"` is supported. """ if mode == 'matplotlib' and self._render: self._painter.draw() elif not self._render: logger.warn( 'Need to pass `render=True` to support ' 'rendering.') else: logger.warn( 'Invalid rendering mode "%s". ' 'Only "matplotlib" is supported.')
def test_env_semantics(spec): logger.warn("Skipping this test. Existing hashes were generated in a bad way") return with open(ROLLOUT_FILE) as data_file: rollout_dict = json.load(data_file) if spec.id not in rollout_dict: if not spec.nondeterministic: logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id)) return logger.info("Testing rollout for {} environment...".format(spec.id)) observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec) errors = [] if rollout_dict[spec.id]['observations'] != observations_now: errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now)) if rollout_dict[spec.id]['actions'] != actions_now: errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now)) if rollout_dict[spec.id]['rewards'] != rewards_now: errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now)) if rollout_dict[spec.id]['dones'] != dones_now: errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now)) if len(errors): for error in errors: logger.warn(error) raise ValueError(errors)
def _check_spec_register(spec: EnvSpec): """Checks whether the spec is valid to be registered. Helper function for `register`.""" global registry, current_namespace if current_namespace is not None: if spec.namespace is not None: logger.warn( f"Custom namespace `{spec.namespace}` is being overridden " f"by namespace `{current_namespace}`. If you are developing a " "plugin you shouldn't specify a namespace in `register` " "calls. The namespace is specified through the " "entry point package metadata.") latest_versioned_spec = max( (spec_ for spec_ in registry.values() if spec_.namespace == spec.namespace and spec_.name == spec.name and spec_.version is not None), key=lambda spec_: int(spec_.version), # type: ignore default=None, ) unversioned_spec = next( (spec_ for spec_ in registry.values() if spec_.namespace == spec.namespace and spec_.name == spec.name and spec_.version is None), None, ) if unversioned_spec is not None and spec.version is not None: raise error.RegistrationError( "Can't register the versioned environment " f"`{spec.id}` when the unversioned environment " f"`{unversioned_spec.id}` of the same name already exists.") elif latest_versioned_spec is not None and spec.version is None: raise error.RegistrationError( "Can't register the unversioned environment " f"`{spec.id}` when the versioned environment " f"`{latest_versioned_spec.id}` of the same name " f"already exists. Note: the default behavior is " f"that `gym.make` with the unversioned environment " f"will return the latest versioned environment")
def _check_nan(env: gym.Env, check_inf: bool = True) -> None: """Check for NaN and Inf.""" for _ in range(10): action = env.action_space.sample() observation, reward, _, _ = env.step(action) if np.any(np.isnan(observation)): logger.warn("Encountered NaN value in observations.") if np.any(np.isnan(reward)): logger.warn("Encountered NaN value in rewards.") if check_inf and np.any(np.isinf(observation)): logger.warn("Encountered inf value in observations.") if check_inf and np.any(np.isinf(reward)): logger.warn("Encountered inf value in rewards.")
def load_env_plugins(entry_point: str = "gym.envs") -> None: # Load third-party environments for plugin in metadata.entry_points(group=entry_point): # Python 3.8 doesn't support plugin.module, plugin.attr # So we'll have to try and parse this ourselves try: module, attr = plugin.module, plugin.attr # type: ignore ## error: Cannot access member "attr" for type "EntryPoint" except AttributeError: if ":" in plugin.value: module, attr = plugin.value.split(":", maxsplit=1) else: module, attr = plugin.value, None except: module, attr = None, None finally: if attr is None: raise error.Error( f"Gym environment plugin `{module}` must specify a function to execute, not a root module" ) context = namespace(plugin.name) if plugin.name.startswith("__") and plugin.name.endswith("__"): # `__internal__` is an artifact of the plugin system when # the root namespace had an allow-list. The allow-list is now # removed and plugins can register environments in the root # namespace with the `__root__` magic key. if plugin.name == "__root__" or plugin.name == "__internal__": context = contextlib.nullcontext() else: logger.warn( f"The environment namespace magic key `{plugin.name}` is unsupported. " "To register an environment at the root namespace you should specify " "the `__root__` namespace.") with context: fn = plugin.load() try: fn() except Exception as e: logger.warn(str(e))
def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) state = self.state # Create a prey with probability 1% if not self.prey.tolist(): px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1)) py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1)) while [px, py] in self.snake.blocks.tolist(): px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1)) py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1)) self.prey = np.array([px, py]) logger.info("[INFO] -- New Prey at {}, {} ".format(px,py)) # print(self.snake.blocks[0].tolist()) if self.snake.blocks[0].tolist() in [self.prey.tolist()]: self.snake.eat_and_move(action) self.state = np.array([self.get_state()]) self.prey = np.array([]) logger.info("[INFO] -- Manger") reward = 500. else: self.snake.move(action) reward = -.5 self.state = np.array([self.get_state()]) done = self.snake.is_dead or self.oob(*self.snake.blocks[0]) if done: logger.warn("DONE") if self.steps_beyond_done is None: self.steps_beyond_done = 0 reward = -1000 else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' but it's already done !") self.steps_beyond_done += 1 return self.state, reward, done, {}
def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = self.state # TODO: rename...It's not really delta. action_0 = action[0] next_state = np.copy(state) if action_0 != 0: next_state[0] = action_0 else: # The last action_input is do nothing. pass # After intervention, the state evolves into the next following the transition prob. next_state[0] = (next_state[0] + 1) % 10 # next_second_digit = next_first_digit % 2 next_state[1] = (next_state[1] + 1) % 2 assert self._observed_state_space.contains( next_state), 'internal error. Illegal next state' self.state = next_state self.step_count += 1 if self.step_count >= self.max_num_steps: if self.steps_beyond_done is None: self.steps_beyond_done = 0 else: if self.steps_beyond_done == 0: logger.warn( 'You are calling \'step()\' even though this environment has already returned done = True. You should ' 'always call \'reset()\' once you receive \'done = True\' -- any further steps are undefined behavior.' ) self.steps_beyond_done += 1 done = self._get_is_done() info = {'done': done, 'steps_beyond_done': self.steps_beyond_done} reward = self.compute_reward(self._get_achieved_goal(), self._get_desired_goal(), done) return self._get_observation(), reward, done, info
def step(self, action): #TODO: assert action is a scalar # assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) # get state x, th, x_dot, th_dot = self.state theta = self._unwrap_angle(th) # clip torque, update dynamics u = np.clip(action, -self.force_mag, self.force_mag) acc = self._accels(anp.array([x, th, x_dot, th_dot, u])) # integrate xacc, thacc = acc[0], acc[1] x_dot = x_dot + self.tau * xacc x = x + self.tau * x_dot th_dot = th_dot + self.tau * thacc th = th + self.tau * th_dot + 0.5 * self.tau**2 * thacc # update state self._unwrap_angle(th) self.state = np.array([x, th, x_dot, th_dot]) done = self.is_done() if not done: reward = 1.0 elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 reward = 0.0 return self.state, reward, done, {}
def _step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = self.state reward = 0.0 if action == 0: # poke if state <= 0: # reward and reset state reward = self.reward_seq_complete state = self.n_press elif action == 1: # press state -= 1 state = max(0, state) else: raise ValueError self.state = state done = False if not done: pass elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 if self.observe_state: obs = np.array([self.state]) else: obs = np.array([1.]) return obs, reward, done, {}
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def step(self, action): state = self.state x, x_dot, theta, theta_dot = state force = self.force_mag * action costheta = math.cos(theta) sintheta = math.sin(theta) temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass if self.kinematics_integrator == 'euler': x = x + self.tau * x_dot x_dot = x_dot + self.tau * xacc theta = theta + self.tau * theta_dot theta_dot = theta_dot + self.tau * thetaacc else: # semi-implicit euler x_dot = x_dot + self.tau * xacc x = x + self.tau * x_dot theta_dot = theta_dot + self.tau * thetaacc theta = theta + self.tau * theta_dot self.state = (x,x_dot,theta,theta_dot) done = x < -self.x_threshold \ or x > self.x_threshold \ or theta < -self.theta_threshold_radians \ or theta > self.theta_threshold_radians done = bool(done) if not done: reward = 1.0 elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 reward = 0.0 return np.array(self.state), reward, done, {}
def __init__(self, low=None, high=None, shape=None, dtype=None): """ Two kinds of valid input: Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape """ if shape is None: assert low.shape == high.shape shape = low.shape else: assert np.isscalar(low) and np.isscalar(high) low = low + np.zeros(shape) high = high + np.zeros(shape) if dtype is None: # Autodetect type if (high == 255).all(): dtype = np.uint8 else: dtype = np.float32 logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype) self.low = low.astype(dtype) self.high = high.astype(dtype) gym.Space.__init__(self, shape, dtype)
def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) state = self.state x, x_dot, theta, theta_dot = state force = self.force_mag if action==1 else -self.force_mag costheta = math.cos(theta) sintheta = math.sin(theta) temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass if self.kinematics_integrator == 'euler': x = x + self.tau * x_dot x_dot = x_dot + self.tau * xacc theta = theta + self.tau * theta_dot theta_dot = theta_dot + self.tau * thetaacc else: # semi-implicit euler x_dot = x_dot + self.tau * xacc x = x + self.tau * x_dot theta_dot = theta_dot + self.tau * thetaacc theta = theta + self.tau * theta_dot self.state = (x,x_dot,theta,theta_dot) done = x < -self.x_threshold \ or x > self.x_threshold done = bool(done) distance_from_desired_angle = (theta - self.desired_angle) / self.flexibility if not done: reward = math.exp(-distance_from_desired_angle*distance_from_desired_angle/2) elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = math.exp(-distance_from_desired_angle*distance_from_desired_angle/2) else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 reward = 0.0 return np.array(self.state), reward, done, {}
def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) self.cityflow.set_tl_phase(self.intersection_id, action) self.cityflow.next_step() state = self._get_state() reward = self._get_reward() self.current_step += 1 if self.is_done: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. " "You should always call 'reset()' once you receive 'done = True' " "-- any further steps are undefined behavior.") reward = 0.0 if self.current_step + 1 == self.steps_per_episode: self.is_done = True return state, reward, self.is_done, {}
def _check_render( env: gym.Env, warn: bool = True, headless: bool = False ) -> None: # pragma: no cover """ Check the declared render modes/fps and the `render()`/`close()` method of the environment. :param env: The environment to check :param warn: Whether to output additional warnings :param headless: Whether to disable render modes that require a graphical interface. False by default. """ render_modes = env.metadata.get("render_modes") if render_modes is None: if warn: logger.warn( "No render modes was declared in the environment " " (env.metadata['render_modes'] is None or not defined), " "you may have trouble when calling `.render()`" ) render_fps = env.metadata.get("render_fps") # We only require `render_fps` if rendering is actually implemented if render_fps is None and render_modes is not None and len(render_modes) > 0: if warn: logger.warn( "No render fps was declared in the environment " " (env.metadata['render_fps'] is None or not defined), " "rendering may occur at inconsistent fps" ) else: # Don't check render mode that require a # graphical interface (useful for CI) if headless and "human" in render_modes: render_modes.remove("human") # Check all declared render modes for render_mode in render_modes: env.render(mode=render_mode) env.close()
def step(self, action): if action <= -10.0 and action >= 10.0: raise Exception reward = self.reward(self.state, action) if action != 0.0: self.steps_without_correct_action += 1.0 done = self.steps_without_correct_action >= 20 if done: if self.steps_beyond_done is None: self.steps_beyond_done = 0 elif self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this " "environment has already returned done = True. You " "should always call 'reset()' once you receive 'done = " "True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 return np.array(self.state), reward, done, {}
def step(self, action): action = np.clip(action, self.action_space.low, self.action_space.high) ds = action[0] dtheta = action[1] x, y, theta = self.state # update theta and keep normalised to [0, 2pi] range theta = (theta + dtheta) % (2 * math.pi) # update position x = x + math.cos(theta) * ds y = y + math.sin(theta) * ds wall_collision = self.is_colliding(x, y, 1) if not wall_collision: self.state[0] = x self.state[1] = y self.state[2] = theta done = self.is_colliding(self.state[0], self.state[1], 'r') reward = -0.1 if done and self.steps_beyond_done is None: # solved the maze! reward += 100.0 self.steps_beyond_done = 0 elif self.steps_beyond_done is not None: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this " "environment has already returned done = True. You " "should always call 'reset()' once you receive 'done = " "True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 return self.normalised_state(), reward, done, {}
def step(self, action): if not self.done: # MDP Transition new_grid, new_context = self.coordinator(self.grid, action, self.context) # New State self.grid = new_grid self.context = new_context # Termination as a function of New State self._is_done() # API Formatting # Necessary condition for MDP, its New State is public obs = new_grid, new_context # Reward as a function of New State reward = self._award() info = self._report() return obs, reward, self.done, info else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this " "environment has already returned done = True. You " "should always call 'reset()' once you receive 'done = " "True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 # Graceful after termination return (self.grid, self.context), 0.0, True, {}
def __init__( self, env, video_folder: str, episode_trigger: Callable[[int], bool] = None, step_trigger: Callable[[int], bool] = None, video_length: int = 0, name_prefix: str = "rl-video", ): super().__init__(env) if episode_trigger is None and step_trigger is None: episode_trigger = capped_cubic_video_schedule trigger_count = sum(x is not None for x in [episode_trigger, step_trigger]) assert trigger_count == 1, "Must specify exactly one trigger" self.episode_trigger = episode_trigger self.step_trigger = step_trigger self.video_recorder = None self.video_folder = os.path.abspath(video_folder) # Create output folder if needed if os.path.isdir(self.video_folder): logger.warn( f"Overwriting existing videos at {self.video_folder} folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)" ) os.makedirs(self.video_folder, exist_ok=True) self.name_prefix = name_prefix self.step_id = 0 self.video_length = video_length self.recording = False self.recorded_frames = 0 self.is_vector_env = getattr(env, "is_vector_env", False) self.episode_id = 0
def patch_deprecated_methods(env): """ Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method. For backward compatibility, this makes it possible to work with unmodified environments. """ global warn_once if warn_once: logger.warn( "Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env))) warn_once = False env.reset = env._reset env.step = env._step env.seed = env._seed def render(mode): return env._render(mode, close=False) def close(): env._render("human", close=True) env.render = render env.close = close
def deprecated_warn_once(text): global warn_once if not warn_once: return warn_once = False logger.warn(text)
import gym import pygame import matplotlib import argparse from gym import logger try: matplotlib.use('TkAgg') import matplotlib.pyplot as plt except ImportError as e: logger.warn('failed to set matplotlib backend, plotting will not work: %s' % str(e)) plt = None from collections import deque from pygame.locals import VIDEORESIZE def display_arr(screen, arr, video_size, transpose): arr_min, arr_max = arr.min(), arr.max() arr = 255.0 * (arr - arr_min) / (arr_max - arr_min) pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr) pyg_img = pygame.transform.scale(pyg_img, video_size) screen.blit(pyg_img, (0,0)) def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None): """Allows one to play the game using keyboard. To simply play the game use: play(gym.make("Pong-v4")) Above code works also if env is wrapped, so it's particularly useful in verifying that the frame-level preprocessing does not render the game