def should_skip_env_spec_for_tests(spec):
    # We skip tests for envs that require dependencies or are otherwise
    # troublesome to run frequently
    ep = spec._entry_point
    # Skip mujoco tests for pull request CI
    if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')):
        return True
        import atari_py
    except ImportError:
        if ep.startswith('gym.envs.atari'):
            return True
        import Box2D
    except ImportError:
        if ep.startswith('gym.envs.box2d'):
            return True

    if (    'GoEnv' in ep or
            'HexEnv' in ep or
            (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
        logger.warn("Skipping tests for env {}".format(ep))
        return True
    return False
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action==1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        x  = x + self.tau * x_dot
        x_dot = x_dot + self.tau * xacc
        theta = theta + self.tau * theta_dot
        theta_dot = theta_dot + self.tau * thetaacc
        self.state = (x,x_dot,theta,theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
 def step(self, action):
     assert self.action_space.contains(action)
     self.last_action = action
     inp_act, out_act, pred = action
     done = False
     reward = 0.0
     self.time += 1
     assert 0 <= self.write_head_position
     if out_act == 1:
             correct = pred == self.target[self.write_head_position]
         except IndexError:
             logger.warn("It looks like you're calling step() even though this "+
                 "environment has already returned done=True. You should always call "+
                 "reset() once you receive done=True. Any further steps are undefined "+
             correct = False
         if correct:
             reward = 1.0
             # Bail as soon as a wrong character is written to the tape
             reward = -0.5
             done = True
         self.write_head_position += 1
         if self.write_head_position >= len(self.target):
             done = True
     if self.time > self.time_limit:
         reward = -1.0
         done = True
     obs = self._get_obs()
     self.last_reward = reward
     self.episode_total_reward += reward
     return (obs, reward, done, {})
    def _start(self, directory, video_callable=None, force=False, resume=False,
              write_upon_reset=False, uid=None, mode=None):
        """Start monitoring.

            directory (str): A per-training run directory where to record stats.
            video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
            uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid().
            mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
        if self.env.spec is None:
            logger.warn("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.")
            env_id = '(unknown)'
            env_id = self.env.spec.id

        if not os.path.exists(directory):
            logger.info('Creating monitor directory %s', directory)
            if six.PY3:
                os.makedirs(directory, exist_ok=True)

        if video_callable is None:
            video_callable = capped_cubic_video_schedule
        elif video_callable == False:
            video_callable = disable_videos
        elif not callable(video_callable):
            raise error.Error('You must provide a function, None, or False for video_callable, not {}: {}'.format(type(video_callable), video_callable))
        self.video_callable = video_callable

        # Check on whether we need to clear anything
        if force:
        elif not resume:
            training_manifests = detect_training_manifests(directory)
            if len(training_manifests) > 0:
                raise error.Error('''Trying to write to monitor directory {} with existing monitor files: {}.

 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'openai-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid())

        self.stats_recorder = stats_recorder.StatsRecorder(directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id)

        if not os.path.exists(directory): os.mkdir(directory)
        self.write_upon_reset = write_upon_reset

        if mode is not None:
    def _encode_image_frame(self, frame):
        if not self.encoder:
            self.encoder = ImageEncoder(self.path, frame.shape, self.frames_per_sec)
            self.metadata['encoder_version'] = self.encoder.version_info

        except error.InvalidFrame as e:
            logger.warn('Tried to pass invalid video frame, marking as broken: %s', e)
            self.broken = True
            self.empty = False
def should_skip_env_spec_for_tests(spec):
    # We skip tests for envs that require dependencies or are otherwise
    # troublesome to run frequently
    ep = spec._entry_point
    # Skip mujoco tests for pull request CI
    skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
    if skip_mujoco and ep.startswith('gym.envs.mujoco:'):
        return True
    if (    'GoEnv' in ep or
            'HexEnv' in ep or
            (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
        logger.warn("Skipping tests for env {}".format(ep))
        return True
    return False
    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.

            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        logger.warn("Could not seed environment %s", self)
def patch_deprecated_methods(env):
    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
    For backward compatibility, this makes it possible to work with unmodified environments.
    global warn_once
    if warn_once:
        logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env)))
        warn_once = False
    env.reset = env._reset
    env.step  = env._step
    env.seed  = env._seed
    def render(mode):
        return env._render(mode, close=False)
    def close():
        env._render("human", close=True)
    env.render = render
    env.close = close
 def __init__(self, low=None, high=None, shape=None, dtype=np.float32):
     Two kinds of valid input:
         Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
         Box(np.array(low=[-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
     if shape is None:
         assert low.shape == high.shape
         shape = low.shape
         assert np.isscalar(low) and np.isscalar(high)
         low = low + np.zeros(shape)
         high = high + np.zeros(shape)
     self.low = low.astype(dtype)
     self.high = high.astype(dtype)
     if (self.high == 255).all() and dtype != np.uint8:
         logger.warn('Box constructor got high=255 but dtype!=uint8')
     Space.__init__(self, shape, dtype)
    def step(self, action):
        if self.curr_episode > self.episodes:
                "You are calling 'step()' even though this environment has already returned done = True. You should always call 'initialize()' and 'reset()' once you receive 'done = True'"

        action = np.array(action)
        if action.shape != (self.numAgents, 2):
                "Incorrect dimenions of action. Action must have destination position for each agent"

        self.curr_episode += 1

        reward = np.zeros(self.numAgents)
        self.agentPosIncrements = np.array([(-1000.0, -1000.0)] *
        agentReachedDest = [False] * self.numAgents
        for _ in xrange(self.updateRate):
            self.curr_step += 1

            #Move targets
            for i in xrange(self.numTargets):

            #Move agent
            for i in xrange(self.numAgents):
                if not agentReachedDest[i]:
                    agentReachedDest[i] = self.moveAgent(i, action[i])
                else:  #Already reached. Removes precision errors
                    self.agentLocations[i] = action[i].astype('float32')

            #Calculate reward at this step
            reward += self.calculateAgentRewards()[0]

            if self.viewer is not None:

        return self.reset(), reward, self.curr_episode >= self.episodes, {}
    def close_extras(self, timeout=None, terminate=False):
        timeout : int or float, optional
            Number of seconds before the call to `close` times out. If `None`,
            the call to `close` never times out. If the call to `close` times
            out, then all processes are terminated.

        terminate : bool (default: `False`)
            If `True`, then the `close` operation is forced and all processes
            are terminated.
        timeout = 0 if terminate else timeout
            if self._state != AsyncState.DEFAULT:
                logger.warn('Calling `close` while waiting for a pending '
                            'call to `{0}` to complete.'.format(
                function = getattr(self, '{0}_wait'.format(self._state.value))
        except mp.TimeoutError:
            terminate = True

        if terminate:
            for process in self.processes:
                if process.is_alive():
            for pipe in self.parent_pipes:
                if (pipe is not None) and (not pipe.closed):
                    pipe.send(('close', None))
            for pipe in self.parent_pipes:
                if (pipe is not None) and (not pipe.closed):

        for pipe in self.parent_pipes:
            if pipe is not None:
        for process in self.processes:
    def step(self, action):
        implementation of the classic “agent-environment loop”.

            action (object) : the board

            observation (object):
            reward (float)
            done (boolean)
            info (dict)
        #assert self.action_space.contains(action), f"{action} ({type(action)})"

        reward = 0
        info = {
            'turn': self.game.turns_count,
            'move_type': None,
            'player': self.game.current_player,
            'player_name': ['white', 'black'][self.game.current_player]

        if self.done:
                "You are calling 'step()' even though this environment has already returned done = True."
                "You should always call 'reset()' once you receive 'done = True'"
                "-- any further steps are undefined behavior.")
            pos0, pos1 = action
            move_check = self.game.action_handler(pos0,

            if move_check:  # if the move is a valid move
                move_type, self._modifications = move_check
                reward = Reward.method_1(self.game.board, move_type)
                # for debug
                info['move_type'] = move_type

        return self.observation, reward, self.done, info
    def step(self, a):
        @brief      Run a simulation step for a given.

        @param[in]  a       The action to perform (in the action space rather than
                            the original torque space).

        @return     The next observation, the reward, the status of the simulation
                    (done or not), and a dictionary of extra information
        if self.continuous:
            torque = a
            torque = self.AVAIL_TORQUE[a] * self.torque_mag

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)

        # Bypass 'self.engine_py.step' method and use direct assignment to max out the performances
        self.engine_py._action[0] = torque
        self.state = self.engine_py.state

        # Get information
        info, obs = self._get_info()
        done = info['is_success']

        # Make sure the simulation is not already over
        if done:
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
                if self.steps_beyond_done == 0:
                    logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
                self.steps_beyond_done += 1

        # Compute the reward
        reward = self.compute_reward(obs['achieved_goal'], self.goal, info)

        return obs, reward, done, info
    def step(self, action):
        thrust = action[0]  # Thrust command
        w = action[1]  # Angular velocity command

        state = self.state
        ref_pos = self.ref_pos
        ref_vel = self.ref_vel

        pos = np.array([state[0], state[1]]).flatten()
        att = np.array([state[2]]).flatten()
        vel = np.array([state[3], state[4]]).flatten()

        acc = thrust / self.mass * np.array(
            [cos(att + pi / 2), sin(att + pi / 2)]) + self.g
        pos = pos + vel * self.dt + 0.5 * acc * self.dt * self.dt
        vel = vel + acc * self.dt
        att = att + w * self.dt

        self.state = (pos[0], pos[1], att, vel[0], vel[1])

        done =  linalg.norm(pos, 2) < -self.pos_threshold \
         or  linalg.norm(pos, 2) > self.pos_threshold \
         or linalg.norm(vel, 2) < -self.vel_threshold \
         or linalg.norm(vel, 2) > self.vel_threshold
        done = bool(done)

        if not done:
            reward = (-linalg.norm(pos, 2))
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
            if self.steps_beyond_done == 0:
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action == 1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot *
                sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta * temp) / (
            self.length *
            (4.0 / 3.0 -
             self.masspole * costheta * costheta / self.total_mass))
        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        x = x + self.tau * x_dot
        x_dot = x_dot + self.tau * xacc
        theta = theta + self.tau * theta_dot
        theta_dot = theta_dot + self.tau * thetaacc
        self.state = (x, x_dot, theta, theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
            if self.steps_beyond_done == 0:
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        #state = np.array(self.state).reshape(4, 4)
        goal_pos = list(zip(*np.where(self.state == 1)))[0]
        agent_pos = list(zip(*np.where(self.state == 3)))[0]
        new_agent_pos = np.array(agent_pos)
        if action==0:
        elif action==1:
        elif action==2:
        elif action==3:
        new_agent_pos = np.clip(new_agent_pos, 0, 3)
        self.state[agent_pos[0], agent_pos[1]] = 0 #moved from this position so it is empty
        self.state[new_agent_pos[0], new_agent_pos[1]] = 3 #moved to this position
        #self.state = tuple(self.state.flatten())
        #check if done
        if goal_pos==list(new_agent_pos):
        #assign reward
        if not done:
            reward = 0
        elif self.steps_beyond_done is None:
            # Just arrived at the goal
            self.steps_beyond_done = 0
            reward = 1
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return self.state, reward, done, {}
    def step(self, action):
        @brief      Run a simulation step for a given action.

        @param[in]  action   The action to perform in the action space
                             Set to None to NOT update the action.

        @return     The next observation, the reward, the status of the episode
                    (done or not), and a dictionary of extra information

        # Bypass 'self.engine_py.action' setter and use
        # direct assignment to max out the performances
        if action is None:
            action = self.action_prev
        self.engine_py._action[:] = action
        self.is_running = True
        self.action_prev = action

        # Extract information about the current simulation state
        done = self._is_done()
        self.learning_info = {'is_success': done}

        reward = self._compute_reward()

        # Make sure the simulation is not already over
        if done:
            if self._steps_beyond_done is None:
                self._steps_beyond_done = 0
                if self._steps_beyond_done == 0:
                        "You are calling 'step()' even though this environment has already \
                                 returned done = True. You should always call 'reset()' once you \
                                 receive 'done = True' -- any further steps are undefined behavior."
                self._steps_beyond_done += 1

        return self.observation, reward, done, self.learning_info
    def _step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state

        reward = 0.0
        if state == 0:
            if action == 0:
                if self.np_random.rand() < self.signal_prob:
                    state = self.n_press
                reward = -1.0
            if action == 1:
                # reducing state to 0
                state -= 1
                if state == 0:
                    # this transition is rewarded
                    reward = self.reward_seq_complete
                # if not pressing, then move directly to 0
                state = 0
        signal = [float(state == self.n_press)]

        self.state = state
        done = False
        done = bool(done)

        if not done:
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            if self.steps_beyond_done == 0:
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
            self.steps_beyond_done += 1

        return np.array(signal), reward, done, {}
    def step(self, action):


        self.current_step += 1

        obs = self._next_observation()
        reward = self._get_reward()

        if self.is_done:
                "You are calling 'step()' even though this environment has already returned done = True. "
                "You should always call 'reset()' once you receive 'done = True' "
                "-- any further steps are undefined behavior.")
            reward = 0.0

        if self.current_step + 1 == self.steps_per_episode:
            self.is_done = True

        return obs, reward, self.is_done, {}
    def render(self, mode='matplotlib'):
      """Renders this environment in its current state.

      Note that, in order to support rendering, 
      `render=True` must be passed to the environment 
        mode(str) Rendering mode. Currently, only 
                  `"matplotlib"` is supported.
      if mode == 'matplotlib' and self._render:
      elif not self._render:
          'Need to pass `render=True` to support '
          'Invalid rendering mode "%s". '
          'Only "matplotlib" is supported.')
def test_env_semantics(spec):
	logger.warn("Skipping this test. Existing hashes were generated in a bad way")	
	with open(ROLLOUT_FILE) as data_file:
		rollout_dict = json.load(data_file)

	if spec.id not in rollout_dict:
		if not spec.nondeterministic:
			logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))

	logger.info("Testing rollout for {} environment...".format(spec.id))

	observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)

	errors = []
	if rollout_dict[spec.id]['observations'] != observations_now:
		errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
	if rollout_dict[spec.id]['actions'] != actions_now:
		errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
	if rollout_dict[spec.id]['rewards'] != rewards_now:
		errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
	if rollout_dict[spec.id]['dones'] != dones_now:
		errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
	if len(errors):
		for error in errors:
		raise ValueError(errors)
def _check_spec_register(spec: EnvSpec):
    """Checks whether the spec is valid to be registered. Helper function for `register`."""
    global registry, current_namespace
    if current_namespace is not None:
        if spec.namespace is not None:
                f"Custom namespace `{spec.namespace}` is being overridden "
                f"by namespace `{current_namespace}`. If you are developing a "
                "plugin you shouldn't specify a namespace in `register` "
                "calls. The namespace is specified through the "
                "entry point package metadata.")

    latest_versioned_spec = max(
         for spec_ in registry.values() if spec_.namespace == spec.namespace
         and spec_.name == spec.name and spec_.version is not None),
        key=lambda spec_: int(spec_.version),  # type: ignore

    unversioned_spec = next(
         for spec_ in registry.values() if spec_.namespace == spec.namespace
         and spec_.name == spec.name and spec_.version is None),

    if unversioned_spec is not None and spec.version is not None:
        raise error.RegistrationError(
            "Can't register the versioned environment "
            f"`{spec.id}` when the unversioned environment "
            f"`{unversioned_spec.id}` of the same name already exists.")
    elif latest_versioned_spec is not None and spec.version is None:
        raise error.RegistrationError(
            "Can't register the unversioned environment "
            f"`{spec.id}` when the versioned environment "
            f"`{latest_versioned_spec.id}` of the same name "
            f"already exists. Note: the default behavior is "
            f"that `gym.make` with the unversioned environment "
            f"will return the latest versioned environment")
def _check_nan(env: gym.Env, check_inf: bool = True) -> None:
    """Check for NaN and Inf."""
    for _ in range(10):
        action = env.action_space.sample()
        observation, reward, _, _ = env.step(action)

        if np.any(np.isnan(observation)):
            logger.warn("Encountered NaN value in observations.")
        if np.any(np.isnan(reward)):
            logger.warn("Encountered NaN value in rewards.")
        if check_inf and np.any(np.isinf(observation)):
            logger.warn("Encountered inf value in observations.")
        if check_inf and np.any(np.isinf(reward)):
            logger.warn("Encountered inf value in rewards.")
def load_env_plugins(entry_point: str = "gym.envs") -> None:
    # Load third-party environments
    for plugin in metadata.entry_points(group=entry_point):
        # Python 3.8 doesn't support plugin.module, plugin.attr
        # So we'll have to try and parse this ourselves
            module, attr = plugin.module, plugin.attr  # type: ignore  ## error: Cannot access member "attr" for type "EntryPoint"
        except AttributeError:
            if ":" in plugin.value:
                module, attr = plugin.value.split(":", maxsplit=1)
                module, attr = plugin.value, None
            module, attr = None, None
            if attr is None:
                raise error.Error(
                    f"Gym environment plugin `{module}` must specify a function to execute, not a root module"

        context = namespace(plugin.name)
        if plugin.name.startswith("__") and plugin.name.endswith("__"):
            # `__internal__` is an artifact of the plugin system when
            # the root namespace had an allow-list. The allow-list is now
            # removed and plugins can register environments in the root
            # namespace with the `__root__` magic key.
            if plugin.name == "__root__" or plugin.name == "__internal__":
                context = contextlib.nullcontext()
                    f"The environment namespace magic key `{plugin.name}` is unsupported. "
                    "To register an environment at the root namespace you should specify "
                    "the `__root__` namespace.")

        with context:
            fn = plugin.load()
            except Exception as e:
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state

        # Create a prey with probability 1%
        if not self.prey.tolist():
            px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1))
            py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1))
            while [px, py] in self.snake.blocks.tolist():
                px = np.float(random.randint(-.5*self.world_width, .5*self.world_width - 1))
                py = np.float(random.randint(-.5*self.world_height, .5*self.world_height - 1))

            self.prey = np.array([px, py])
            logger.info("[INFO] -- New Prey at {}, {} ".format(px,py))

        # print(self.snake.blocks[0].tolist()) 
        if self.snake.blocks[0].tolist() in [self.prey.tolist()]:
            self.state = np.array([self.get_state()])
            self.prey = np.array([])
            logger.info("[INFO] -- Manger")
            reward = 500.
            reward = -.5
            self.state = np.array([self.get_state()])
        done = self.snake.is_dead or self.oob(*self.snake.blocks[0])

        if done:
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
                reward = -1000
                if self.steps_beyond_done == 0:
                    logger.warn("You are calling 'step()' but it's already done !")
                self.steps_beyond_done += 1
        return self.state, reward, done, {}
    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state

        # TODO: rename...It's not really delta.
        action_0 = action[0]
        next_state = np.copy(state)
        if action_0 != 0:
            next_state[0] = action_0
            # The last action_input is do nothing.

        # After intervention, the state evolves into the next following the transition prob.
        next_state[0] = (next_state[0] + 1) % 10
        # next_second_digit = next_first_digit % 2
        next_state[1] = (next_state[1] + 1) % 2
        assert self._observed_state_space.contains(
            next_state), 'internal error. Illegal next state'

        self.state = next_state
        self.step_count += 1
        if self.step_count >= self.max_num_steps:
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
                if self.steps_beyond_done == 0:
                        'You are calling \'step()\' even though this environment has already returned done = True. You should '
                        'always call \'reset()\' once you receive \'done = True\' -- any further steps are undefined behavior.'
                self.steps_beyond_done += 1
        done = self._get_is_done()

        info = {'done': done, 'steps_beyond_done': self.steps_beyond_done}
        reward = self.compute_reward(self._get_achieved_goal(),
                                     self._get_desired_goal(), done)

        return self._get_observation(), reward, done, info
    def step(self, action):
        #TODO: assert action is a scalar
        # assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))

        # get state
        x, th, x_dot, th_dot = self.state
        theta = self._unwrap_angle(th)

        # clip torque, update dynamics
        u = np.clip(action, -self.force_mag, self.force_mag)
        acc = self._accels(anp.array([x, th, x_dot, th_dot, u]))

        # integrate
        xacc, thacc = acc[0], acc[1]
        x_dot = x_dot + self.tau * xacc
        x = x + self.tau * x_dot
        th_dot = th_dot + self.tau * thacc
        th = th + self.tau * th_dot + 0.5 * self.tau**2 * thacc

        # update state
        self.state = np.array([x, th, x_dot, th_dot])

        done = self.is_done()

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
            if self.steps_beyond_done == 0:
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
            self.steps_beyond_done += 1
            reward = 0.0

        return self.state, reward, done, {}
def test_env_semantics(spec):
    logger.warn("Skipping this test. Existing hashes were generated in a bad way")
    with open(ROLLOUT_FILE) as data_file:
        rollout_dict = json.load(data_file)

    if spec.id not in rollout_dict:
        if not spec.nondeterministic:
            logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))

    logger.info("Testing rollout for {} environment...".format(spec.id))

    observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)

    errors = []
    if rollout_dict[spec.id]['observations'] != observations_now:
        errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
    if rollout_dict[spec.id]['actions'] != actions_now:
        errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
    if rollout_dict[spec.id]['rewards'] != rewards_now:
        errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
    if rollout_dict[spec.id]['dones'] != dones_now:
        errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
    if len(errors):
        for error in errors:
        raise ValueError(errors)
    def _step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state

        reward = 0.0
        if action == 0:
            # poke
            if state <= 0:
                # reward and reset state
                reward = self.reward_seq_complete
                state = self.n_press
        elif action == 1:
            # press
            state -= 1
            state = max(0, state)
            raise ValueError

        self.state = state
        done = False

        if not done:
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            if self.steps_beyond_done == 0:
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
            self.steps_beyond_done += 1

        if self.observe_state:
            obs = np.array([self.state])
            obs = np.array([1.])

        return obs, reward, done, {}
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)

        if frame is None:
            if self._async:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path)
                self.broken = True
            self.last_frame = frame
            if self.ansi_mode:
    def step(self, action):
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag * action
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        if self.kinematics_integrator == 'euler':
            x  = x + self.tau * x_dot
            x_dot = x_dot + self.tau * xacc
            theta = theta + self.tau * theta_dot
            theta_dot = theta_dot + self.tau * thetaacc
        else: # semi-implicit euler
            x_dot = x_dot + self.tau * xacc
            x  = x + self.tau * x_dot
            theta_dot = theta_dot + self.tau * thetaacc
            theta = theta + self.tau * theta_dot
        self.state = (x,x_dot,theta,theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
 def __init__(self, low=None, high=None, shape=None, dtype=None):
     Two kinds of valid input:
         Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
         Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
     if shape is None:
         assert low.shape == high.shape
         shape = low.shape
         assert np.isscalar(low) and np.isscalar(high)
         low = low + np.zeros(shape)
         high = high + np.zeros(shape)
     if dtype is None:  # Autodetect type
         if (high == 255).all():
             dtype = np.uint8
             dtype = np.float32
         logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
     self.low = low.astype(dtype)
     self.high = high.astype(dtype)
     gym.Space.__init__(self, shape, dtype)
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action==1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        if self.kinematics_integrator == 'euler':
            x  = x + self.tau * x_dot
            x_dot = x_dot + self.tau * xacc
            theta = theta + self.tau * theta_dot
            theta_dot = theta_dot + self.tau * thetaacc
        else: # semi-implicit euler
            x_dot = x_dot + self.tau * xacc
            x  = x + self.tau * x_dot
            theta_dot = theta_dot + self.tau * thetaacc
            theta = theta + self.tau * theta_dot
        self.state = (x,x_dot,theta,theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold
        done = bool(done)

        distance_from_desired_angle = (theta - self.desired_angle) / self.flexibility
        if not done:
            reward = math.exp(-distance_from_desired_angle*distance_from_desired_angle/2)
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = math.exp(-distance_from_desired_angle*distance_from_desired_angle/2)
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
 def __init__(self, low=None, high=None, shape=None, dtype=None):
     Two kinds of valid input:
         Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
         Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
     if shape is None:
         assert low.shape == high.shape
         shape = low.shape
         assert np.isscalar(low) and np.isscalar(high)
         low = low + np.zeros(shape)
         high = high + np.zeros(shape)
     if dtype is None:  # Autodetect type
         if (high == 255).all():
             dtype = np.uint8
             dtype = np.float32
         logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
     self.low = low.astype(dtype)
     self.high = high.astype(dtype)
     gym.Space.__init__(self, shape, dtype)
    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        self.cityflow.set_tl_phase(self.intersection_id, action)

        state = self._get_state()
        reward = self._get_reward()

        self.current_step += 1

        if self.is_done:
                "You are calling 'step()' even though this environment has already returned done = True. "
                "You should always call 'reset()' once you receive 'done = True' "
                "-- any further steps are undefined behavior.")
            reward = 0.0

        if self.current_step + 1 == self.steps_per_episode:
            self.is_done = True

        return state, reward, self.is_done, {}
def _check_render(
    env: gym.Env, warn: bool = True, headless: bool = False
) -> None:  # pragma: no cover
    Check the declared render modes/fps and the `render()`/`close()`
    method of the environment.
    :param env: The environment to check
    :param warn: Whether to output additional warnings
    :param headless: Whether to disable render modes
        that require a graphical interface. False by default.
    render_modes = env.metadata.get("render_modes")
    if render_modes is None:
        if warn:
                "No render modes was declared in the environment "
                " (env.metadata['render_modes'] is None or not defined), "
                "you may have trouble when calling `.render()`"

    render_fps = env.metadata.get("render_fps")
    # We only require `render_fps` if rendering is actually implemented
    if render_fps is None and render_modes is not None and len(render_modes) > 0:
        if warn:
                "No render fps was declared in the environment "
                " (env.metadata['render_fps'] is None or not defined), "
                "rendering may occur at inconsistent fps"

        # Don't check render mode that require a
        # graphical interface (useful for CI)
        if headless and "human" in render_modes:
        # Check all declared render modes
        for render_mode in render_modes:
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)

        if frame is None:
            if self._async:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn('Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path)
                self.broken = True
            self.last_frame = frame
            if self.ansi_mode:
    def step(self, action):
        if action <= -10.0 and action >= 10.0: raise Exception

        reward = self.reward(self.state, action)

        if action != 0.0:
            self.steps_without_correct_action += 1.0

        done = self.steps_without_correct_action >= 20

        if done:
            if self.steps_beyond_done is None:
                self.steps_beyond_done = 0
            elif self.steps_beyond_done == 0:
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1

        return np.array(self.state), reward, done, {}
    def step(self, action):
        action = np.clip(action, self.action_space.low, self.action_space.high)

        ds = action[0]
        dtheta = action[1]

        x, y, theta = self.state

        # update theta and keep normalised to [0, 2pi] range
        theta = (theta + dtheta) % (2 * math.pi)
        # update position
        x = x + math.cos(theta) * ds
        y = y + math.sin(theta) * ds

        wall_collision = self.is_colliding(x, y, 1)
        if not wall_collision:
            self.state[0] = x
            self.state[1] = y
            self.state[2] = theta

        done = self.is_colliding(self.state[0], self.state[1], 'r')

        reward = -0.1

        if done and self.steps_beyond_done is None:
            # solved the maze!
            reward += 100.0
            self.steps_beyond_done = 0
        elif self.steps_beyond_done is not None:
            if self.steps_beyond_done == 0:
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1

        return self.normalised_state(), reward, done, {}
    def step(self, action):

        if not self.done:

            # MDP Transition
            new_grid, new_context = self.coordinator(self.grid, action,

            # New State
            self.grid = new_grid
            self.context = new_context

            # Termination as a function of New State

            # API Formatting
            # Necessary condition for MDP, its New State is public
            obs = new_grid, new_context
            # Reward as a function of New State
            reward = self._award()
            info = self._report()

            return obs, reward, self.done, info


            if self.steps_beyond_done == 0:

                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior.")

            self.steps_beyond_done += 1

            # Graceful after termination
            return (self.grid, self.context), 0.0, True, {}
    def __init__(
        video_folder: str,
        episode_trigger: Callable[[int], bool] = None,
        step_trigger: Callable[[int], bool] = None,
        video_length: int = 0,
        name_prefix: str = "rl-video",

        if episode_trigger is None and step_trigger is None:
            episode_trigger = capped_cubic_video_schedule

        trigger_count = sum(x is not None
                            for x in [episode_trigger, step_trigger])
        assert trigger_count == 1, "Must specify exactly one trigger"

        self.episode_trigger = episode_trigger
        self.step_trigger = step_trigger
        self.video_recorder = None

        self.video_folder = os.path.abspath(video_folder)
        # Create output folder if needed
        if os.path.isdir(self.video_folder):
                f"Overwriting existing videos at {self.video_folder} folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)"
        os.makedirs(self.video_folder, exist_ok=True)

        self.name_prefix = name_prefix
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
        self.is_vector_env = getattr(env, "is_vector_env", False)
        self.episode_id = 0
    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
    For backward compatibility, this makes it possible to work with unmodified environments.
    global warn_once
    if warn_once:
            "Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior."
            % str(type(env)))
        warn_once = False
    env.reset = env._reset
    env.step = env._step
    env.seed = env._seed

    def render(mode):
        return env._render(mode, close=False)

    def close():
        env._render("human", close=True)

    env.render = render
    env.close = close
def deprecated_warn_once(text):
    global warn_once
    if not warn_once: return
    warn_once = False
import gym
import pygame
import matplotlib
import argparse
from gym import logger
    import matplotlib.pyplot as plt
except ImportError as e:
    logger.warn('failed to set matplotlib backend, plotting will not work: %s' % str(e))
    plt = None

from collections import deque
from pygame.locals import VIDEORESIZE

def display_arr(screen, arr, video_size, transpose):
    arr_min, arr_max = arr.min(), arr.max()
    arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
    pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
    pyg_img = pygame.transform.scale(pyg_img, video_size)
    screen.blit(pyg_img, (0,0))

def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None):
    """Allows one to play the game using keyboard.

    To simply play the game use:


    Above code works also if env is wrapped, so it's particularly useful in
    verifying that the frame-level preprocessing does not render the game