Beispiel #1
0
 def step(self, action):
     assert self.action_space.contains(action)
     self.last_action = action
     inp_act, out_act, pred = action
     done = False
     reward = 0.0
     self.time += 1
     assert 0 <= self.write_head_position
     if out_act == 1:
         try:
             correct = pred == self.target[self.write_head_position]
         except IndexError:
             logger.warn("It looks like you're calling step() even though this "+
                 "environment has already returned done=True. You should always call "+
                 "reset() once you receive done=True. Any further steps are undefined "+
                 "behaviour.")
             correct = False
         if correct:
             reward = 1.0
         else:
             # Bail as soon as a wrong character is written to the tape
             reward = -0.5
             done = True
         self.write_head_position += 1
         if self.write_head_position >= len(self.target):
             done = True
     self._move(inp_act)
     if self.time > self.time_limit:
         reward = -1.0
         done = True
     obs = self._get_obs()
     self.last_reward = reward
     self.episode_total_reward += reward
     return (obs, reward, done, {})
Beispiel #2
0
    def _encode_image_frame(self, frame):
        if not self.encoder:
            self.encoder = ImageEncoder(self.path, frame.shape,
                                        self.frames_per_sec)
            self.metadata['encoder_version'] = self.encoder.version_info

        try:
            self.encoder.capture_frame(frame)
        except error.InvalidFrame as e:
            logger.warn(
                'Tried to pass invalid video frame, marking as broken: %s', e)
            self.broken = True
        else:
            self.empty = False
Beispiel #3
0
def should_skip_env_spec_for_tests(spec):
    # We skip tests for envs that require dependencies or are otherwise
    # troublesome to run frequently
    ep = spec._entry_point
    # Skip mujoco tests for pull request CI
    # skip_mujoco = not (os.environ.get('MUJOCO_KEY'))
    # if skip_mujoco and (ep.startswith('scigym.envs.mujoco:') or ep.startswith('scigym.envs.robotics:')):
    #     return True
    if ('GoEnv' in ep or 'HexEnv' in ep  # or
            # (ep.startswith("scigym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
        ):
        logger.warn("Skipping tests for env {}".format(ep))
        return True
    return False
Beispiel #4
0
    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action == 1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot *
                sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta * temp) / (
            self.length *
            (4.0 / 3.0 -
             self.masspole * costheta * costheta / self.total_mass))
        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        if self.kinematics_integrator == 'euler':
            x = x + self.tau * x_dot
            x_dot = x_dot + self.tau * xacc
            theta = theta + self.tau * theta_dot
            theta_dot = theta_dot + self.tau * thetaacc
        else:  # semi-implicit euler
            x_dot = x_dot + self.tau * xacc
            x = x + self.tau * x_dot
            theta_dot = theta_dot + self.tau * thetaacc
            theta = theta + self.tau * theta_dot
        self.state = (x, x_dot, theta, theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state), reward, done, {}
Beispiel #5
0
    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.

        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        """
        logger.warn("Could not seed environment %s", self)
        return
Beispiel #6
0
def update_rollout_dict(spec, rollout_dict):
    """
    Takes as input the environment spec for which the rollout is to be generated,
    and the existing dictionary of rollouts. Returns True iff the dictionary was
    modified.
    """
    # Skip platform-dependent
    if should_skip_env_spec_for_tests(spec):
        logger.info("Skipping tests for {}".format(spec.id))
        return False

    # Skip environments that are nondeterministic
    if spec.nondeterministic:
        logger.info("Skipping tests for nondeterministic env {}".format(spec.id))
        return False

    logger.info("Generating rollout for {}".format(spec.id))

    try:
        observations_hash, actions_hash, rewards_hash, dones_hash = generate_rollout_hash(spec)
    except:
        # If running the env generates an exception, don't write to the rollout file
        logger.warn("Exception {} thrown while generating rollout for {}. Rollout not added.".format(sys.exc_info()[0], spec.id))
        return False

    rollout = {}
    rollout['observations'] = observations_hash
    rollout['actions'] = actions_hash
    rollout['rewards'] = rewards_hash
    rollout['dones'] = dones_hash

    existing = rollout_dict.get(spec.id)
    if existing:
        differs = False
        for key, new_hash in rollout.items():
            differs = differs or existing[key] != new_hash
        if not differs:
            logger.debug("Hashes match with existing for {}".format(spec.id))
            return False
        else:
            logger.warn("Got new hash for {}. Overwriting.".format(spec.id))

    rollout_dict[spec.id] = rollout
    return True
Beispiel #7
0
def patch_deprecated_methods(env):
    """
    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
    For backward compatibility, this makes it possible to work with unmodified environments.
    """
    global warn_once
    if warn_once:
        logger.warn(
            "Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior."
            % str(type(env)))
        warn_once = False
    env.reset = env._reset
    env.step = env._step
    env.seed = env._seed

    def render(mode):
        return env._render(mode, close=False)

    def close():
        env._render("human", close=True)

    env.render = render
    env.close = close
Beispiel #8
0
    def capture_frame(self):
        """Render the given `env` and add the resulting frame to the video."""
        if not self.functional: return
        logger.debug('Capturing video frame: path=%s', self.path)

        render_mode = 'ansi' if self.ansi_mode else 'rgb_array'
        frame = self.env.render(mode=render_mode)

        if frame is None:
            if self._async:
                return
            else:
                # Indicates a bug in the environment: don't want to raise
                # an error here.
                logger.warn(
                    'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s',
                    self.path, self.metadata_path)
                self.broken = True
        else:
            self.last_frame = frame
            if self.ansi_mode:
                self._encode_ansi_frame(frame)
            else:
                self._encode_image_frame(frame)
Beispiel #9
0
 def __init__(self, low=None, high=None, shape=None, dtype=None):
     """
     Two kinds of valid input:
         Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
         Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
     """
     if shape is None:
         assert low.shape == high.shape
         shape = low.shape
     else:
         assert np.isscalar(low) and np.isscalar(high)
         low = low + np.zeros(shape)
         high = high + np.zeros(shape)
     if dtype is None:  # Autodetect type
         if (high == 255).all():
             dtype = np.uint8
         else:
             dtype = np.float32
         logger.warn(
             "scigym.spaces.Box autodetected dtype as {}. Please provide explicit dtype."
             .format(dtype))
     self.low = low.astype(dtype)
     self.high = high.astype(dtype)
     scigym.Space.__init__(self, shape, dtype)
Beispiel #10
0
def test_env_semantics(spec):
    logger.warn(
        "Skipping this test. Existing hashes were generated in a bad way")
    return
    with open(ROLLOUT_FILE) as data_file:
        rollout_dict = json.load(data_file)

    if spec.id not in rollout_dict:
        if not spec.nondeterministic:
            logger.warn(
                "Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs"
                .format(spec.id))
        return

    logger.info("Testing rollout for {} environment...".format(spec.id))

    observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(
        spec)

    errors = []
    if rollout_dict[spec.id]['observations'] != observations_now:
        errors.append(
            'Observations not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['observations'],
                observations_now))
    if rollout_dict[spec.id]['actions'] != actions_now:
        errors.append(
            'Actions not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['actions'], actions_now))
    if rollout_dict[spec.id]['rewards'] != rewards_now:
        errors.append(
            'Rewards not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
    if rollout_dict[spec.id]['dones'] != dones_now:
        errors.append(
            'Dones not equal for {} -- expected {} but got {}'.format(
                spec.id, rollout_dict[spec.id]['dones'], dones_now))
    if len(errors):
        for error in errors:
            logger.warn(error)
        raise ValueError(errors)
Beispiel #11
0
def deprecated_warn_once(text):
    global warn_once
    if not warn_once: return
    warn_once = False
    logger.warn(text)
Beispiel #12
0
    def _start(self,
               directory,
               video_callable=None,
               force=False,
               resume=False,
               write_upon_reset=False,
               uid=None,
               mode=None):
        """Start monitoring.

        Args:
            directory (str): A per-training run directory where to record stats.
            video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "sciencegym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
            uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid().
            mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
        """
        if self.env.spec is None:
            logger.warn(
                "Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'scigym.make', and is recommended only for advanced users."
            )
            env_id = '(unknown)'
        else:
            env_id = self.env.spec.id

        if not os.path.exists(directory):
            logger.info('Creating monitor directory %s', directory)
            if six.PY3:
                os.makedirs(directory, exist_ok=True)
            else:
                os.makedirs(directory)

        if video_callable is None:
            video_callable = capped_cubic_video_schedule
        elif video_callable == False:
            video_callable = disable_videos
        elif not callable(video_callable):
            raise error.Error(
                'You must provide a function, None, or False for video_callable, not {}: {}'
                .format(type(video_callable), video_callable))
        self.video_callable = video_callable

        # Check on whether we need to clear anything
        if force:
            clear_monitor_files(directory)
        elif not resume:
            training_manifests = detect_training_manifests(directory)
            if len(training_manifests) > 0:
                raise error.Error(
                    '''Trying to write to monitor directory {} with existing monitor files: {}.

 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''
                    .format(directory, ', '.join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'sci-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = '{}.{}'.format(self._monitor_id,
                                         uid if uid else os.getpid())

        self.stats_recorder = stats_recorder.StatsRecorder(
            directory,
            '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix),
            autoreset=self.env_semantics_autoreset,
            env_id=env_id)

        if not os.path.exists(directory): os.mkdir(directory)
        self.write_upon_reset = write_upon_reset

        if mode is not None:
            self._set_mode(mode)