def step(self, action): assert self.action_space.contains(action) self.last_action = action inp_act, out_act, pred = action done = False reward = 0.0 self.time += 1 assert 0 <= self.write_head_position if out_act == 1: try: correct = pred == self.target[self.write_head_position] except IndexError: logger.warn("It looks like you're calling step() even though this "+ "environment has already returned done=True. You should always call "+ "reset() once you receive done=True. Any further steps are undefined "+ "behaviour.") correct = False if correct: reward = 1.0 else: # Bail as soon as a wrong character is written to the tape reward = -0.5 done = True self.write_head_position += 1 if self.write_head_position >= len(self.target): done = True self._move(inp_act) if self.time > self.time_limit: reward = -1.0 done = True obs = self._get_obs() self.last_reward = reward self.episode_total_reward += reward return (obs, reward, done, {})
def _encode_image_frame(self, frame): if not self.encoder: self.encoder = ImageEncoder(self.path, frame.shape, self.frames_per_sec) self.metadata['encoder_version'] = self.encoder.version_info try: self.encoder.capture_frame(frame) except error.InvalidFrame as e: logger.warn( 'Tried to pass invalid video frame, marking as broken: %s', e) self.broken = True else: self.empty = False
def should_skip_env_spec_for_tests(spec): # We skip tests for envs that require dependencies or are otherwise # troublesome to run frequently ep = spec._entry_point # Skip mujoco tests for pull request CI # skip_mujoco = not (os.environ.get('MUJOCO_KEY')) # if skip_mujoco and (ep.startswith('scigym.envs.mujoco:') or ep.startswith('scigym.envs.robotics:')): # return True if ('GoEnv' in ep or 'HexEnv' in ep # or # (ep.startswith("scigym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest")) ): logger.warn("Skipping tests for env {}".format(ep)) return True return False
def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = self.state x, x_dot, theta, theta_dot = state force = self.force_mag if action == 1 else -self.force_mag costheta = math.cos(theta) sintheta = math.sin(theta) temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta * temp) / ( self.length * (4.0 / 3.0 - self.masspole * costheta * costheta / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass if self.kinematics_integrator == 'euler': x = x + self.tau * x_dot x_dot = x_dot + self.tau * xacc theta = theta + self.tau * theta_dot theta_dot = theta_dot + self.tau * thetaacc else: # semi-implicit euler x_dot = x_dot + self.tau * xacc x = x + self.tau * x_dot theta_dot = theta_dot + self.tau * thetaacc theta = theta + self.tau * theta_dot self.state = (x, x_dot, theta, theta_dot) done = x < -self.x_threshold \ or x > self.x_threshold \ or theta < -self.theta_threshold_radians \ or theta > self.theta_threshold_radians done = bool(done) if not done: reward = 1.0 elif self.steps_beyond_done is None: # Pole just fell! self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 reward = 0.0 return np.array(self.state), reward, done, {}
def seed(self, seed=None): """Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list<bigint>: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ logger.warn("Could not seed environment %s", self) return
def update_rollout_dict(spec, rollout_dict): """ Takes as input the environment spec for which the rollout is to be generated, and the existing dictionary of rollouts. Returns True iff the dictionary was modified. """ # Skip platform-dependent if should_skip_env_spec_for_tests(spec): logger.info("Skipping tests for {}".format(spec.id)) return False # Skip environments that are nondeterministic if spec.nondeterministic: logger.info("Skipping tests for nondeterministic env {}".format(spec.id)) return False logger.info("Generating rollout for {}".format(spec.id)) try: observations_hash, actions_hash, rewards_hash, dones_hash = generate_rollout_hash(spec) except: # If running the env generates an exception, don't write to the rollout file logger.warn("Exception {} thrown while generating rollout for {}. Rollout not added.".format(sys.exc_info()[0], spec.id)) return False rollout = {} rollout['observations'] = observations_hash rollout['actions'] = actions_hash rollout['rewards'] = rewards_hash rollout['dones'] = dones_hash existing = rollout_dict.get(spec.id) if existing: differs = False for key, new_hash in rollout.items(): differs = differs or existing[key] != new_hash if not differs: logger.debug("Hashes match with existing for {}".format(spec.id)) return False else: logger.warn("Got new hash for {}. Overwriting.".format(spec.id)) rollout_dict[spec.id] = rollout return True
def patch_deprecated_methods(env): """ Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method. For backward compatibility, this makes it possible to work with unmodified environments. """ global warn_once if warn_once: logger.warn( "Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env))) warn_once = False env.reset = env._reset env.step = env._step env.seed = env._seed def render(mode): return env._render(mode, close=False) def close(): env._render("human", close=True) env.render = render env.close = close
def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( 'Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame)
def __init__(self, low=None, high=None, shape=None, dtype=None): """ Two kinds of valid input: Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape """ if shape is None: assert low.shape == high.shape shape = low.shape else: assert np.isscalar(low) and np.isscalar(high) low = low + np.zeros(shape) high = high + np.zeros(shape) if dtype is None: # Autodetect type if (high == 255).all(): dtype = np.uint8 else: dtype = np.float32 logger.warn( "scigym.spaces.Box autodetected dtype as {}. Please provide explicit dtype." .format(dtype)) self.low = low.astype(dtype) self.high = high.astype(dtype) scigym.Space.__init__(self, shape, dtype)
def test_env_semantics(spec): logger.warn( "Skipping this test. Existing hashes were generated in a bad way") return with open(ROLLOUT_FILE) as data_file: rollout_dict = json.load(data_file) if spec.id not in rollout_dict: if not spec.nondeterministic: logger.warn( "Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs" .format(spec.id)) return logger.info("Testing rollout for {} environment...".format(spec.id)) observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash( spec) errors = [] if rollout_dict[spec.id]['observations'] != observations_now: errors.append( 'Observations not equal for {} -- expected {} but got {}'.format( spec.id, rollout_dict[spec.id]['observations'], observations_now)) if rollout_dict[spec.id]['actions'] != actions_now: errors.append( 'Actions not equal for {} -- expected {} but got {}'.format( spec.id, rollout_dict[spec.id]['actions'], actions_now)) if rollout_dict[spec.id]['rewards'] != rewards_now: errors.append( 'Rewards not equal for {} -- expected {} but got {}'.format( spec.id, rollout_dict[spec.id]['rewards'], rewards_now)) if rollout_dict[spec.id]['dones'] != dones_now: errors.append( 'Dones not equal for {} -- expected {} but got {}'.format( spec.id, rollout_dict[spec.id]['dones'], dones_now)) if len(errors): for error in errors: logger.warn(error) raise ValueError(errors)
def deprecated_warn_once(text): global warn_once if not warn_once: return warn_once = False logger.warn(text)
def _start(self, directory, video_callable=None, force=False, resume=False, write_upon_reset=False, uid=None, mode=None): """Start monitoring. Args: directory (str): A per-training run directory where to record stats. video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording. force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "sciencegym."). resume (bool): Retain the training data already in this directory, which will be merged with our new data write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.) uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid(). mode (['evaluation', 'training']): Whether this is an evaluation or training episode. """ if self.env.spec is None: logger.warn( "Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'scigym.make', and is recommended only for advanced users." ) env_id = '(unknown)' else: env_id = self.env.spec.id if not os.path.exists(directory): logger.info('Creating monitor directory %s', directory) if six.PY3: os.makedirs(directory, exist_ok=True) else: os.makedirs(directory) if video_callable is None: video_callable = capped_cubic_video_schedule elif video_callable == False: video_callable = disable_videos elif not callable(video_callable): raise error.Error( 'You must provide a function, None, or False for video_callable, not {}: {}' .format(type(video_callable), video_callable)) self.video_callable = video_callable # Check on whether we need to clear anything if force: clear_monitor_files(directory) elif not resume: training_manifests = detect_training_manifests(directory) if len(training_manifests) > 0: raise error.Error( '''Trying to write to monitor directory {} with existing monitor files: {}. You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.''' .format(directory, ', '.join(training_manifests[:5]))) self._monitor_id = monitor_closer.register(self) self.enabled = True self.directory = os.path.abspath(directory) # We use the 'sci-gym' prefix to determine if a file is # ours self.file_prefix = FILE_PREFIX self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid()) self.stats_recorder = stats_recorder.StatsRecorder( directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id) if not os.path.exists(directory): os.mkdir(directory) self.write_upon_reset = write_upon_reset if mode is not None: self._set_mode(mode)