def _reset_sim(self):
        i = random.uniform(0, 1)
        if i > 0.30:
            if i > 0.65:
                self.cur_target = random.randrange(21, 30)
            else:
                self.cur_target = random.randrange(11, 20)
        else:
            self.cur_target = random.randrange(1, 10)

        self.target_body = self.target_body_template.replace(
            '_', str(self.cur_target))
        self.off_ground_count = 0
        self.t = 0
        self.sim.set_state(self.initial_state)
        self.sim.forward()

        self.sim.data.ctrl[:] = 0.

        initial_qpos = self.sim.data.get_joint_qpos(self.target_body).copy()
        initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
        initial_mocap_pos = self.sim.data.mocap_pos[0]
        initial_mocap_quat = np.array([0, 0, 0, 1])

        assert initial_qpos.shape == (7, )
        assert initial_pos.shape == (3, )
        assert initial_quat.shape == (4, )
        assert initial_mocap_pos.shape == (3, )
        assert initial_mocap_quat.shape == (4, )
        initial_qpos = None

        # Randomization initial rotation for target object and mocap body
        if self.randomize_initial_rotation:
            if self.target_rotation == 'z':
                angle = self.np_random.uniform(-np.pi / 10, np.pi / 10)
                axis = np.array([0., 0., 1.])
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation == 'parallel':
                angle = self.np_random.uniform(-np.pi, np.pi)
                axis = np.array([0., 0., 1.])
                z_quat = quat_from_angle_and_axis(angle, axis)
                parallel_quat = self.parallel_quats[self.np_random.randint(
                    len(self.parallel_quats))]
                offset_quat = rotations.quat_mul(z_quat, parallel_quat)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation in ['xyz', 'ignore']:
                angle = self.np_random.uniform(np.pi, np.pi)
                axis = self.np_random.uniform(-1., 1., size=3)
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation == 'fixed':
                pass
            else:
                raise error.Error(
                    'Unknown target_rotation option "{}".'.format(
                        self.target_rotation))

            mocap_angle_y = self.np_random.uniform(-np.pi / 30., np.pi / 30.)
            mocap_axis_y = np.array([0., 1., 0.])
            mocap_offset_quat_y = quat_from_angle_and_axis(
                mocap_angle_y, mocap_axis_y)
            initial_mocap_quat = rotations.quat_mul(initial_mocap_quat,
                                                    mocap_offset_quat_y)

        # Randomize initial position.
        if self.randomize_initial_position:
            if self.target_position != 'fixed':
                initial_pos += np.array([
                    np.random.uniform(self.target_position_range[0][0],
                                      self.target_position_range[0][1]),
                    np.random.uniform(self.target_position_range[1][0],
                                      self.target_position_range[1][1]),
                    np.random.uniform(self.target_position_range[2][0],
                                      self.target_position_range[2][1])
                ])

        self.init_object_pos = initial_pos
        initial_quat /= np.linalg.norm(initial_quat)
        initial_qpos = np.concatenate([initial_pos, initial_quat])
        self.sim.data.set_joint_qpos(self.target_body, initial_qpos)

        # set mocap body
        self.sim.data.mocap_pos[:] = self.init_object_pos + np.array(
            [0.00, -0.17, 0.06])
        self.sim.data.mocap_quat[:] = initial_mocap_quat.copy()

        # Run the simulation for a bunch of timesteps to let everything settle in.
        for _ in range(10):
            self._set_action(np.zeros(self.n_actions))
            try:
                self.sim.step()
            except mujoco_py.MujocoException:
                return False
        return True
Exemple #2
0
    def __init__(self,
                 env,
                 path=None,
                 metadata=None,
                 enabled=True,
                 base_path=None):
        modes = env.metadata.get('render.modes', [])
        self.ansi_mode = False
        if 'rgb_array' not in modes:
            if 'ansi' in modes:
                self.ansi_mode = True
            else:
                logger.info(
                    'Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".'
                    .format(env))
                enabled = False

        if path is not None and base_path is not None:
            raise error.Error(
                "You can pass at most one of `path` or `base_path`.")

        self.enabled = enabled
        self.last_frame = None
        if not self.enabled:
            return

        self.env = env

        required_ext = '.json' if self.ansi_mode else '.mp4'
        if path is None:
            if base_path is not None:
                # Base path given, append ext
                path = base_path + required_ext
            else:
                # Otherwise, just generate a unique filename
                with tempfile.NamedTemporaryFile(suffix=required_ext,
                                                 delete=False) as f:
                    path = f.name
        self.path = path

        path_base, actual_ext = os.path.splitext(self.path)

        if actual_ext != required_ext:
            hint = " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format." if self.ansi_mode else ''
            raise error.Error(
                "Invalid path given: {} -- must have file extension {}.{}".
                format(self.path, required_ext, hint))
        # Touch the file in any case, so we know it's present. (This
        # corrects for platform platform differences. Using ffmpeg on
        # OS X, the file is precreated, but not on Linux.
        touch(path)

        self.frames_per_sec = env.metadata.get('video.frames_per_second', 30)
        self.encoder = None  # lazily start the process
        self.broken = False

        # Dump metadata
        self.metadata = metadata or {}
        self.metadata[
            'content_type'] = 'video/vnd.openai.ansivid' if self.ansi_mode else 'video/mp4'
        self.metadata_path = '{}.meta.json'.format(path_base)
        self.write_metadata()

        logger.info('Starting new video recorder writing to %s', self.path)
        self.empty = True
Exemple #3
0
def upload(training_dir, algorithm_id=None, writeup=None, api_key=None):
    """Upload the results of training (as automatically recorded by your
    env's monitor) to OpenAI Gym.

    Args:
        training_dir (Optional[str]): A directory containing the results of a training run.
        algorithm_id (Optional[str]): An arbitrary string indicating the paricular version of the algorithm (including choices of parameters) you are running.
        writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
        api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
    """

    open_monitors = list(monitoring._monitors.values())
    if len(open_monitors) > 0:
        envs = [
            m.env.spec.id if m.env.spec else '(unknown)' for m in open_monitors
        ]
        raise error.Error(
            "Still have an open monitor on {}. You must run 'env.monitor.close()' before uploading."
            .format(', '.join(envs)))

    env_info, training_episode_batch, training_video = upload_training_data(
        training_dir, api_key=api_key)
    env_id = env_info['env_id']
    training_episode_batch_id = training_video_id = None
    if training_episode_batch:
        training_episode_batch_id = training_episode_batch.id
    if training_video:
        training_video_id = training_video.id

    if logger.level <= logging.INFO:
        if training_episode_batch_id is not None and training_video_id is not None:
            logger.info(
                '[%s] Creating evaluation object from %s with learning curve and training video',
                env_id, training_dir)
        elif training_episode_batch_id is not None:
            logger.info(
                '[%s] Creating evaluation object from %s with learning curve',
                env_id, training_dir)
        elif training_video_id is not None:
            logger.info(
                '[%s] Creating evaluation object from %s with training video',
                env_id, training_dir)
        else:
            raise error.Error(
                "[%s] You didn't have any recorded training data in {}. Once you've used 'env.monitor.start(training_dir)' to start recording, you need to actually run some rollouts. Please join the community chat on https://gym.openai.com if you have any issues."
                .format(env_id, training_dir))

    evaluation = resource.Evaluation.create(
        training_episode_batch=training_episode_batch_id,
        training_video=training_video_id,
        env=env_info['env_id'],
        algorithm={
            'id': algorithm_id,
        },
        writeup=writeup,
        gym_version=env_info['gym_version'],
        api_key=api_key,
    )

    logger.info(
        """
****************************************************
You successfully uploaded your evaluation on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
    """.rstrip(), env_id, evaluation.web_url())

    return evaluation
Exemple #4
0
    def player_move(self,
                    table_state,
                    action,
                    last_seq_move=None,
                    _round=None):
        self.update_localstate(table_state)
        bigblind = table_state.get('bigblind')
        tocall = min(table_state.get('tocall', 0), self.stack)
        minraise = 0  #table_state.get('minraise', 0) - 10
        [action_idx, raise_amount] = action
        raise_amount = int(raise_amount)
        action_idx = int(action_idx)
        if action[0] == 2:
            if _round == 0:
                if self.position == 0 and self.count_r(last_seq_move) == 0:
                    action[1] = 40
                elif self.position == 2:
                    action[1] = 50 if self.count_r(last_seq_move) > 0 else 25
                if self.get_seat() == 2 and self.count_r(last_seq_move) > 0:
                    action[1] = 50 if self.count_r(last_seq_move) > 1 else 25
            else:

                action[1] = 50 if self.count_r(last_seq_move) > 0 else 25

        if (self.count_r(last_seq_move)) > 1 or _round != 0:

            to_call = 25 * (self.count_r(last_seq_move))

        [action_idx, raise_amount] = action
        raise_amount = int(raise_amount)
        action_idx = int(action_idx)

        if tocall == 0:
            # if not(action_idx in [Player.CHECK, Player.RAISE]):
            #   print("watch")
            assert action_idx in [Player.CHECK, Player.RAISE]
            if action_idx == Player.RAISE:
                if raise_amount < minraise:
                    raise error.Error(
                        'raise must be greater than minraise {}'.format(
                            minraise))
                if raise_amount > self.stack:
                    raise_amount = self.stack
                move_tuple = ('raise', raise_amount)
            elif action_idx == Player.CHECK:
                move_tuple = ('check', 0)
            else:
                raise error.Error(
                    'invalid action ({}) must be check (0) or raise (2)'.
                    format(action_idx))
        else:
            if action_idx not in [Player.RAISE, Player.CALL, Player.FOLD]:
                raise error.Error(
                    'invalid action ({}) must be raise (2), call (1), or fold (3)'
                    .format(action_idx))
            if action_idx == Player.RAISE:
                if raise_amount < minraise:
                    raise error.Error(
                        'raise must be greater than minraise {}'.format(
                            minraise))
                if raise_amount > self.stack:
                    raise_amount = self.stack
                move_tuple = ('raise', raise_amount)
            elif action_idx == Player.CALL:
                move_tuple = ('call', tocall)
            elif action_idx == Player.FOLD:
                move_tuple = ('fold', -1)
            else:
                raise error.Error(
                    'invalid action ({}) must be raise (2), call (1), or fold (3)'
                    .format(action_idx))
        return move_tuple
Exemple #5
0
def upload(training_dir,
           algorithm_id=None,
           writeup=None,
           benchmark_id=None,
           api_key=None,
           ignore_open_monitors=False):
    """Upload the results of training (as automatically recorded by your
    env's monitor) to OpenAI Gym.

    Args:
        training_dir (Optional[str]): A directory containing the results of a training run.
        algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name
        benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
        writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
        api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
    """

    if benchmark_id:
        # We're uploading a benchmark run.

        directories = []
        env_ids = []
        for name, _, files in os.walk(training_dir):
            manifests = monitoring.detect_training_manifests(name, files=files)
            if manifests:
                env_info = monitoring.load_env_info_from_manifests(
                    manifests, training_dir)
                env_ids.append(env_info['env_id'])
                directories.append(name)

        # Validate against benchmark spec
        try:
            spec = benchmark_spec(benchmark_id)
        except error.UnregisteredBenchmark as e:
            raise error.Error(
                "Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?"
                .format(benchmark_id))

        # TODO: verify that the number of trials matches
        spec_env_ids = [
            task.env_id for task in spec.tasks for _ in range(task.trials)
        ]

        # This could be more stringent about mixing evaluations
        if sorted(env_ids) != sorted(spec_env_ids):
            raise error.Error(
                "Evaluations do not match spec for benchmark {}. In {}, we found evaluations for {}, expected {}"
                .format(benchmark_id, training_dir, sorted(env_ids),
                        sorted(spec_env_ids)))

        benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id,
                                                     algorithm_id=algorithm_id)
        benchmark_run_id = benchmark_run.id

        # Actually do the uploads.
        for training_dir in directories:
            # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
            _upload(training_dir, None, writeup, benchmark_run_id, api_key,
                    ignore_open_monitors)

        logger.info(
            """
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), benchmark_id, benchmark_run.web_url())

        return benchmark_run_id
    else:
        # Single evalution upload
        benchmark_run_id = None
        evaluation = _upload(training_dir, algorithm_id, writeup,
                             benchmark_run_id, api_key, ignore_open_monitors)

        logger.info(
            """
****************************************************
You successfully uploaded your evaluation on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), evaluation.env, evaluation.web_url())

        return None
Exemple #6
0
 def register(self, id, **kwargs):
     if id in self.env_specs:
         raise error.Error('Cannot re-register id: {}'.format(id))
     self.env_specs[id] = EnvSpec(id, **kwargs)
Exemple #7
0
    def __init__(self,
                 env,
                 path=None,
                 metadata=None,
                 enabled=True,
                 base_path=None):
        modes = env.metadata.get("render.modes", [])
        self._async = env.metadata.get("semantics.async")
        self.enabled = enabled
        self._closed = False

        # Don't bother setting anything else if not enabled
        if not self.enabled:
            return

        self.ansi_mode = False
        if "rgb_array" not in modes:
            if "ansi" in modes:
                self.ansi_mode = True
            else:
                logger.info(
                    f'Disabling video recorder because {env} neither supports video mode "rgb_array" nor "ansi".'
                )
                # Whoops, turns out we shouldn't be enabled after all
                self.enabled = False
                return

        if path is not None and base_path is not None:
            raise error.Error(
                "You can pass at most one of `path` or `base_path`.")

        self.last_frame = None
        self.env = env

        required_ext = ".json" if self.ansi_mode else ".mp4"
        if path is None:
            if base_path is not None:
                # Base path given, append ext
                path = base_path + required_ext
            else:
                # Otherwise, just generate a unique filename
                with tempfile.NamedTemporaryFile(suffix=required_ext,
                                                 delete=False) as f:
                    path = f.name
        self.path = path

        path_base, actual_ext = os.path.splitext(self.path)

        if actual_ext != required_ext:
            hint = (
                " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format."
                if self.ansi_mode else "")
            raise error.Error(
                f"Invalid path given: {self.path} -- must have file extension {required_ext}.{hint}"
            )
        # Touch the file in any case, so we know it's present. (This
        # corrects for platform platform differences. Using ffmpeg on
        # OS X, the file is precreated, but not on Linux.
        touch(path)

        self.frames_per_sec = env.metadata.get("video.frames_per_second", 30)
        self.output_frames_per_sec = env.metadata.get(
            "video.output_frames_per_second", self.frames_per_sec)
        self.encoder = None  # lazily start the process
        self.broken = False

        # Dump metadata
        self.metadata = metadata or {}
        self.metadata["content_type"] = ("video/vnd.openai.ansivid"
                                         if self.ansi_mode else "video/mp4")
        self.metadata_path = f"{path_base}.meta.json"
        self.write_metadata()

        logger.info("Starting new video recorder writing to %s", self.path)
        self.empty = True
Exemple #8
0
    def start(self,
              directory,
              video_callable=None,
              force=False,
              resume=False,
              seed=None,
              write_upon_reset=False):
        """Start monitoring.

        Args:
            directory (str): A per-training run directory where to record stats.
            video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            seed (Optional[int]): The seed to run this environment with. By default, a random seed will be chosen.
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
        """
        if self.env.spec is None:
            logger.warn(
                "Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users."
            )

        if not os.path.exists(directory):
            logger.info('Creating monitor directory %s', directory)
            os.makedirs(directory)

        if video_callable is None:
            video_callable = capped_cubic_video_schedule
        elif video_callable == False:
            video_callable = disable_videos
        elif not callable(video_callable):
            raise error.Error(
                'You must provide a function, None, or False for video_callable, not {}: {}'
                .format(type(video_callable), video_callable))

        # Check on whether we need to clear anything
        if force:
            clear_monitor_files(directory)
        elif not resume:
            training_manifests = detect_training_manifests(directory)
            if len(training_manifests) > 0:
                raise error.Error(
                    '''Trying to write to monitor directory {} with existing monitor files: {}.

 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''
                    .format(directory, ', '.join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'openai-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = '{}.{}'.format(self._monitor_id, os.getpid())
        self.stats_recorder = stats_recorder.StatsRecorder(
            directory, '{}.episode_batch.{}'.format(self.file_prefix,
                                                    self.file_infix))
        self.configure(video_callable=video_callable)
        if not os.path.exists(directory):
            os.mkdir(directory)
        self.write_upon_reset = write_upon_reset

        seeds = self.env.seed(seed)
        if not isinstance(seeds, list):
            logger.warn(
                'env.seed returned unexpected result: %s (should be a list of ints)',
                seeds)

        self.seeds = seeds
Exemple #9
0
  def step(self, actions):
    """
    CHECK = 0
    CALL = 1
    RAISE = 2
    FOLD = 3
    RAISE_AMT = [0, minraise]
    """
    if len(actions) != len(self._seats):
      raise error.Error('actions must be same shape as number of seats.')

    if self._current_player is None:
      raise error.Error('Round cannot be played without 2 or more players.')

    if self._round == 4:
      raise error.Error('Rounds already finished, needs to be reset.')

    players = [p for p in self._seats if p.playing_hand]
    if len(players) == 1:
      raise error.Error('Round cannot be played with one player.')

    self._last_player = self._current_player
    self._last_actions = actions


    if len(self.community) > 1:
      for player in players:
        player.handrank = self._evaluator.evaluate(player.hand, self.community)
        #print(player._seat, ", Score:", player.handrank)

    if not self._current_player.playedthisround and len([p for p in players if not p.isallin]) >= 1:
      if self._current_player.isallin:
        self._current_player = self._next(players, self._current_player)
        return self._get_current_step_returns(False)

      move = self._current_player.player_move(
          self._output_state(self._current_player), actions[self._current_player.player_id])

      if move[0] == 'call':
        self._player_bet(self._current_player, self._tocall)
        if self._debug:
          print('Player', self._current_player.player_id, move)
        self._current_player = self._next(players, self._current_player)
      elif move[0] == 'check':
        self._player_bet(self._current_player, self._current_player.currentbet)
        if self._debug:
          print('Player', self._current_player.player_id, move)
        self._current_player = self._next(players, self._current_player)
      elif move[0] == 'raise':
        self._player_bet(self._current_player, move[1]+self._current_player.currentbet)
        if self._debug:
          print('Player', self._current_player.player_id, move)
        for p in players:
          if p != self._current_player:
            p.playedthisround = False
        self._current_player = self._next(players, self._current_player)
      elif move[0] == 'fold':
        self._current_player.playing_hand = False
        folded_player = self._current_player
        if self._debug:
          print('Player', self._current_player.player_id, move)
        self._current_player = self._next(players, self._current_player)
        players.remove(folded_player)
        self._folded_players.append(folded_player)
        # break if a single player left
        if len(players) == 1:
          self._resolve(players)
    if all([player.playedthisround for player in players]):
      self._resolve(players)

    terminal = False
    if all([player.isallin for player in players]):
      while self._round < 4:
        self._deal_next_round()
        self._round += 1
    if self._round == 4 or len(players) == 1:
      terminal = True
      self._resolve_round(players)
    return self._get_current_step_returns(terminal,self._round)
Exemple #10
0
    def step(self, actions):
        """ Run one timestep of the game (t -> t+1)
            @param: actions the list of actions that player selected.
            ACTIONS_ENUM {
              CHECK = 0,
              CALL = 1,
              RAISE = 2,
              FOLD = 3
            }
            RAISE_AMT = [0, minraise]
           """
        if len(actions) != len(self.seats):
            raise error.Error('actions must be same shape as number of seats.')

        if self._current_player is None:
            raise error.Error(
                'Round cannot be played without 2 or more players.')

        if self._round == 4:
            raise error.Error('Rounds already finished, needs to be reset.')

        players = [p for p in self.seats if p.playing_hand]

        self._last_player = self._current_player
        self._last_actions = actions
        self._last_moved_actions = [a for a in actions]

        oriAction = actions[self._current_player.player_id]
        move = self._current_player.player_move(
            self._output_state(self._current_player), oriAction)
        newAction = (getattr(action_table(), move[0].upper()), move[1])
        if (move != newAction):
            self._last_moved_actions[
                self._current_player.player_id] = newAction

        if move[0] == 'call':
            self._player_bet(self._current_player,
                             self._tocall - self._current_player.currentbet)
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)
            self._current_player = self._next(players, self._current_player)
        elif move[0] == 'check':
            self._player_bet(self._current_player, 0)
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)
            self._current_player = self._next(players, self._current_player)
        elif move[0] == 'raise':
            self._player_bet(self._current_player, move[1])
            self._roundBetCount += 1
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)
            for p in players:
                if p != self._current_player:
                    p.playedthisround = False
            self._current_player = self._next(players, self._current_player)
        elif move[0] == 'fold':
            self._current_player.playing_hand = False
            folded_player = self._current_player
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)
            self._current_player = self._next(players, self._current_player)
            players.remove(folded_player)
            self._folded_players.append(folded_player)

        # for record last action.currentbet
        pid = self._last_player.player_id
        if self._last_actions[pid][0] == action_table().CALL:
            self._last_actions[pid] = (self._last_actions[pid][0],
                                       self._last_player.currentbet)
        if self._last_moved_actions[pid][0] == action_table().CALL:
            self._last_moved_actions[pid] = (self._last_moved_actions[pid][0],
                                             self._last_player.currentbet)

        playersNotAllin = [p for p in players if not p.isallin]
        if (len(playersNotAllin) <
                1) or (len(playersNotAllin) == 1 and playersNotAllin[0]
                       == self._last_player) or (len(players) <= 1):
            while self._round < 4:
                self._resolve(players)
        elif self._current_player.playedthisround:
            self._resolve(players)

        terminal = False
        if self._round == 4:
            terminal = True
            self._resolve_round(players)
            # traceback final state
            self._current_player = self._last_player
        return self._get_current_step_returns(terminal)
Exemple #11
0
 def monitor(self):
     raise error.Error(
         "env.monitor has been deprecated as of 12/23/2016. Remove your call to `env.monitor.start(directory)` and instead wrap your env with `env = gym.wrappers.Monitor(env, directory)` to record data."
     )
Exemple #12
0
 def __init__(self):
     raise error.Error(
         "Error initializing BipedalWalkerHardcore Environment.\n"
         "Currently, we do not support initializing this mode of environment by calling the class directly.\n"
         "To use this environment, instead create it by specifying the hardcore keyword in gym.make, i.e.\n"
         'gym.make("BipedalWalker-v3", hardcore=True)')
    def __init__(self,
                 monitor,
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        self.game_path = atari_py.get_game_path(monitor.game_name)

        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (monitor.game_name, self.game_path))

        self._obs_type = 'image'  # HACK to image for now.
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None
        # added monitor to keep track of things
        self.monitor = monitor

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed_and_load_rom()

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        # goals specific
        self._goals_set = monitor.goals_set_small  # 84x84
        self._goals_center = monitor.goals_center
        self.goals_space = spaces.Discrete(len(self._goals_set))
        self.desired_goal = -1  # we set and tell the agent to achieve this desired_goal.
        self.achieved_goal = -1  # we should keep track of which goal it currently achieved.
        self.goals_history = set(
        )  # can keep track of how it achieved the set of goals to the currently achieved_goal

        # we need to calculate whether agent achieve the goal so we need to keep track of agent loc
        # HACK only montezuma_revenge specific right now
        if monitor.game_name == 'montezuma_revenge':
            self.agent_origin = [42, 33]
            self.agent_last_x = 42
            self.agent_last_y = 33

        (screen_width, screen_hight) = self.ale.getScreenDims()

        self.init_screen = self.ale.getScreenGrayscale()

        # Don't think i will use this
        if self._obs_type == 'ram':
            self.observation_space = spaces.Dict({
                'observation':
                spaces.Box(low=0,
                           high=255,
                           shape=(screen_hight, screen_width, 3),
                           dtype=np.uint8),
                'achieved_goal':
                spaces.Discrete(1),
                'desired_goal':
                spaces.Discrete(1)
            })
        elif self._obs_type == 'image':
            self.observation_space = spaces.Dict({
                'observation':
                spaces.Box(low=0,
                           high=255,
                           shape=(screen_hight, screen_width, 3),
                           dtype=np.uint8),
                'achieved_goal':
                spaces.Discrete(1),
                'desired_goal':
                spaces.Discrete(1)
            })
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))
Exemple #14
0
def _upload_benchmark(training_dir, algorithm_id, benchmark_id,
                      benchmark_run_tags, api_key, ignore_open_monitors,
                      skip_videos):
    # We're uploading a benchmark run.
    directories = []
    env_ids = []
    for name, _, files in os.walk(training_dir):
        manifests = monitoring.detect_training_manifests(name, files=files)
        if manifests:
            env_info = monitoring.load_env_info_from_manifests(
                manifests, training_dir)
            env_ids.append(env_info['env_id'])
            directories.append(name)

    # Validate against benchmark spec
    try:
        spec = benchmark_spec(benchmark_id)
    except error.UnregisteredBenchmark:
        raise error.Error(
            "Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?"
            .format(benchmark_id))

    spec_env_ids = [
        task.env_id for task in spec.tasks for _ in range(task.trials)
    ]

    if not env_ids:
        raise error.Error(
            "Could not find any evaluations in {}".format(training_dir))

    # This could be more stringent about mixing evaluations
    if sorted(env_ids) != sorted(spec_env_ids):
        logger.info(
            "WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s",
            benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids))

    tags = json.dumps(benchmark_run_tags)
    _create_with_retries = util.retry_exponential_backoff(
        resource.BenchmarkRun.create,
        (error.APIConnectionError, ),
        max_retries=5,
        interval=3,
    )
    benchmark_run = _create_with_retries(benchmark_id=benchmark_id,
                                         algorithm_id=algorithm_id,
                                         tags=tags)
    benchmark_run_id = benchmark_run.id

    # Actually do the uploads.
    for training_dir in directories:
        # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
        _upload_with_retries = util.retry_exponential_backoff(
            _upload,
            (error.APIConnectionError, ),
            max_retries=5,
            interval=3,
        )
        _upload_with_retries(training_dir, None, None, benchmark_run_id,
                             api_key, ignore_open_monitors, skip_videos)

    logger.info(
        """
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
    """.rstrip(), benchmark_id, benchmark_run.web_url())

    return benchmark_run_id
Exemple #15
0
    def step(self, actions):
        """ Run one timestep of the game (t -> t+1)
            @param: actions the list of actions that player selected.
            ACTIONS_ENUM {
              CHECK = 0,
              CALL = 1,
              RAISE = 2,
              FOLD = 3
            }
            RAISE_AMT = [0, minraise]
           """
        if len(actions) != len(self._seats):
            raise error.Error('actions must be same shape as number of seats.')

        if self._current_player is None:
            raise error.Error(
                'Round cannot be played without 2 or more players.')

        if self._round == 4:
            raise error.Error(
                'Rounds already finished, needs to go new_cycle.')

        players = [p for p in self._seats if p.playing_hand]
        if len(players) == 1:
            # raise error.Error('Round cannot be played with one player.')
            terminal = True
            self._resolve_round(players)
            return self._get_current_step_returns(terminal)

        self._last_player = self._current_player
        self._last_actions = actions

        #if self._current_player.isallin:
        #    self._current_player = self._next(players, self._current_player)
        #    return self._get_current_step_returns(False)

        move = self._current_player.player_move(
            self._output_state(self._current_player),
            actions[self._current_player.player_id])

        bet_increment = 0
        folded_player = None
        if move[0] == 'call':
            bet_increment = self._player_bet(self._current_player,
                                             self._tocall)
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)
        elif move[0] == 'check':
            bet_increment = self._player_bet(self._current_player,
                                             self._tocall)
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)
            if self.round_holder == -1:
                self.round_holder = self._current_player.player_id
        elif move[0] == 'raise':
            bet_increment = self._player_bet(self._current_player,
                                             move[1] + self._tocall)
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)
            self.round_holder = self._current_player.player_id
        elif move[0] == 'fold':
            self._current_player.playing_hand = False
            folded_player = self._current_player
            if self._debug:
                print('[DEBUG] Player', self._current_player.player_id, move)

        self._current_player = self._next(players, self._current_player)
        if folded_player:
            players.remove(folded_player)
            self._folded_players.append(folded_player)
        # break if a single player left
        if len(players) == 1:
            self.logger.info("1 player left. Jump to next round.")
            self._resolve(players)
        if self._current_player.player_id == self.round_holder:
            self.logger.info(
                "All played. Jump to next round. current_player = {}".format(
                    self._current_player.player_id))
            self._resolve(players)
        #if len([p for p in players if not p.isallin]) == 0:
        #    self.logger.info("Everyone is all in. Jump to next round.")
        #    self._resolve(players)

        terminal = False
        if self._round == 4:
            terminal = True
            self._resolve_round(players)
        return self._get_current_step_returns(terminal)
Exemple #16
0
    def __init__(self,
                 game='pong',
                 mode=None,
                 difficulty=None,
                 obs_type='ram',
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 full_action_space=False,
                 render_screen=False,
                 screen_scale=None):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, mode, difficulty, obs_type,
                                frameskip, repeat_action_probability)
        assert obs_type in ('ram', 'image')

        self.game = game
        self.game_path = atari_py.get_game_path(game)
        self.game_mode = mode
        self.game_difficulty = difficulty

        if not os.path.exists(self.game_path):
            msg = 'You asked for game %s but path %s does not exist'
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        #self.ale = atari_py.ALEInterface()
        self.ale = EnvPongDraft_Surface_Headless.EnvPong(
            render_screen=render_screen, screen_scale=screen_scale)
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), \
                "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))
        #self.action_space = spaces.Box(
        #   low=0,
        #  high=1, shape=(1,),
        # dtype=np.float32
        #)
        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))
Exemple #17
0
 def configure(self, *args, **kwargs):
     raise error.Error(
         "Env.configure has been removed in gym v0.8.0, released on 2017/03/05. If you need Env.configure, please use gym version 0.7.x from pip, or checkout the `gym:v0.7.4` tag from git."
     )
Exemple #18
0
    def __init__(self, model_paths, frame_skip, observation_size, action_bounds, \
                 dt=0.002, obs_type="parameter", action_type="continuous", visualize=True, disableViewer=False, \
                 screen_width=80, screen_height=45):
        assert obs_type in ('parameter', 'image')
        assert action_type in ("continuous", "discrete")
        print('pydart initialization OK')

        self.viewer = None

        if len(model_paths) < 1:
            raise StandardError("At least one model file is needed.")

        if isinstance(model_paths, str):
            model_paths = [model_paths]

        # convert everything to fullpath
        full_paths = []
        for model_path in model_paths:
            if model_path.startswith("/"):
                fullpath = model_path
            else:
                fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                        model_path)
            if not path.exists(fullpath):
                raise IOError("File %s does not exist" % fullpath)
            full_paths.append(fullpath)

        if full_paths[0][-5:] == '.skel':
            self.dart_world = pydart.World(dt, full_paths[0])
        else:
            self.dart_world = pydart.World(dt)
            for fullpath in full_paths:
                self.dart_world.add_skeleton(fullpath)

        self.robot_skeleton = self.dart_world.skeletons[
            -1]  # assume that the skeleton of interest is always the last one

        for jt in range(0, len(self.robot_skeleton.joints)):
            for dof in range(len(self.robot_skeleton.joints[jt].dofs)):
                if self.robot_skeleton.joints[jt].has_position_limit(dof):
                    self.robot_skeleton.joints[jt].set_position_limit_enforced(
                        True)

        self._obs_type = obs_type
        self.frame_skip = frame_skip
        self.visualize = visualize  # Show the window or not
        self.disableViewer = disableViewer

        # random perturbation
        self.add_perturbation = False
        self.perturbation_parameters = [
            0.05, 5, 2
        ]  # probability, magnitude, bodyid, duration
        self.perturbation_duration = 40
        self.perturb_force = np.array([0, 0, 0])

        # assert not done
        self.obs_dim = observation_size
        self.act_dim = len(action_bounds[0])

        # for discrete instances, action_space should be defined in the subclass
        if action_type == "continuous":
            self.action_space = spaces.Box(action_bounds[1], action_bounds[0])

        self.track_skeleton_id = -1  # track the last skeleton's com by default

        # initialize the viewer, get the window size
        # initial here instead of in _render
        # in image learning
        self.screen_width = screen_width
        self.screen_height = screen_height
        self._get_viewer()
        # Give different observation space for different kind of envs
        if self._obs_type == 'parameter':
            high = np.inf * np.ones(self.obs_dim)
            low = -high
            self.observation_space = spaces.Box(low, high)
        elif self._obs_type == 'image':
            # Change to grayscale image later
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.screen_width,
                                                       self.screen_height))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))

        self._seed()

        # self.viewer = None

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
Exemple #19
0
 def type(self, type):
     if type not in ['t', 'e']:
         raise error.Error(
             'Invalid episode type {}: must be t for training or e for evaluation',
             type)
     self._type = type
Exemple #20
0
 def register(self, id, **kwargs):
     _self = gym.envs.registry
     if id in _self.env_specs:
         raise error.Error('Cannot re-register id: {}'.format(id))
     _self.env_specs[id] = EnvSpec(id, **kwargs)
Exemple #21
0
    def _load_level(self):
        # Closing the level if it is initialized
        if self.is_initialized:
            self.is_initialized = False
            self.game.close()
            self.game = DoomGame()

        # Customizing level
        if getattr(self, '_customize_game', None) is not None and callable(self._customize_game):
            self.level = -1
            self._customize_game()


        else:
            # Loading Paths
            if not self.is_initialized:
                self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
                self.game.set_doom_game_path(self.loader.get_freedoom_path())


            # Common Settings
            self.game.load_config(os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG]))
            self.game.set_doom_scenario_path(self.loader.get_scenario_path(DOOM_SETTINGS[self.level][SCENARIO]))

            # Random Map Selection
            if DOOM_SETTINGS[self.level][MAP] != '':
                if RANDOMIZE_MAPS > 0 and 'labyrinth' in DOOM_SETTINGS[self.level][CONFIG].lower():
                    if 'fix' in DOOM_SETTINGS[self.level][SCENARIO].lower():
                        # mapId = 'map%02d'%np.random.randint(1, 23)
                        mapId = 'map%02d' % np.random.randint(4, 8)
                    else:
                        mapId = 'map%02d' % np.random.randint(1, RANDOMIZE_MAPS + 1)
                    print('\t=> Special Config: Randomly Loading Maps. MapID = ' + mapId)
                    self.game.set_doom_map(mapId)
                else:
                    print('\t=> Default map loaded. MapID = ' + DOOM_SETTINGS[self.level][MAP])
                    self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP])

            # Setting Vizdoom map settings

            self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY])
            self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS]
            self.game.set_screen_resolution(self.screen_resolution)

        self.previous_level = self.level
        self._closed = False

        # Algo mode
        if 'human' != self._mode:
            if NO_MONSTERS:
                print('\t=> Special Config: Monsters Removed.')
                self.game.add_game_args('-nomonsters 1')
            self.game
            self.game.set_window_visible(False)
            self.game.set_mode(Mode.PLAYER)
            self.no_render = False
            try:
                with self.lock:
                    self.game.init()
            except (ViZDoomUnexpectedExitException, ViZDoomErrorException):
                raise error.Error(
                    'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. ' +
                    'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' +
                    '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' +
                    'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' +
                    'singleton lock in memory.')
            self._start_episode()
            self.is_initialized = True
            return self.game.get_state().image_buffer.copy()

        # Human mode
        else:
            if NO_MONSTERS:
                print('\t=> Special Config: Monsters Removed.')
                self.game.add_game_args('-nomonsters 1')
            self.game.add_game_args('+freelook 1')
            self.game.set_window_visible(True)
            self.game.set_mode(Mode.SPECTATOR)
            self.no_render = True
            with self.lock:
                self.game.init()
            self._start_episode()
            self.is_initialized = True
            self._play_human_mode()
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
Exemple #22
0
 def env(self):
     env = self._env_ref()
     if env is None:
         raise error.Error("env has been garbage collected. To keep using a monitor, you must keep around a reference to the env object. (HINT: try assigning the env to a variable in your code.)")
     return env
Exemple #23
0
    def _sample_goal(self, goal_side=None):
        """
        :param goal_side: which hand (1 or 2) is the target near. If None we automatically choose the side furthest away.
        """
        if self.two_objects:
            obj_state = self._get_achieved_goal()
            pos_1 = np.mean(self.target_position_1, axis=-1)
            pos_2 = np.mean(self.target_position_2, axis=-1)
            dist_1 = np.linalg.norm(obj_state["object_1"][:3] - pos_1)
            dist_2 = np.linalg.norm(obj_state["object_1"][:3] - pos_2)
            if dist_1 > dist_2:
                target_pos_1 = self.np_random.uniform(
                    self.target_position_1[:, 0], self.target_position_1[:, 1])
                target_pos_2 = self.np_random.uniform(
                    self.target_position_2[:, 0], self.target_position_2[:, 1])
            else:
                target_pos_1 = self.np_random.uniform(
                    self.target_position_2[:, 0], self.target_position_2[:, 1])
                target_pos_2 = self.np_random.uniform(
                    self.target_position_1[:, 0], self.target_position_1[:, 1])
        else:
            if goal_side is None:
                obj_state = self._get_achieved_goal()
                pos_1 = np.mean(self.target_position_1, axis=-1)
                pos_2 = np.mean(self.target_position_2, axis=-1)
                dist_1 = np.linalg.norm(obj_state[:3] - pos_1)
                dist_2 = np.linalg.norm(obj_state[:3] - pos_2)
                if dist_1 > dist_2:
                    goal_side = 1
                else:
                    goal_side = 2
            if goal_side == 1:
                target_pos_1 = self.np_random.uniform(
                    self.target_position_1[:, 0], self.target_position_1[:, 1])
            else:
                target_pos_1 = self.np_random.uniform(
                    self.target_position_2[:, 0], self.target_position_2[:, 1])

        if self.target_rotation == 'z':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = np.array([0.0, 0.0, 1.0])
            target_quat_1 = quat_from_angle_and_axis(angle, axis)
            if self.two_objects:
                angle_2 = self.np_random.uniform(-np.pi, np.pi)
                target_quat_2 = quat_from_angle_and_axis(angle_2, axis)
        elif self.target_rotation == 'xyz':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = self.np_random.uniform(-1.0, 1.0, size=3)
            target_quat_1 = quat_from_angle_and_axis(angle, axis)
            if self.two_objects:
                angle_2 = self.np_random.uniform(-np.pi, np.pi)
                axis_2 = self.np_random.uniform(-1.0, 1.0, size=3)
                target_quat_2 = quat_from_angle_and_axis(angle_2, axis_2)
        elif self.target_rotation == 'ignore':
            target_quat_1 = np.zeros((4, )) + 0.1
            if self.two_objects:
                target_quat_2 = np.zeros((4, )) + 0.1
        else:
            raise error.Error('Unknown target_rotation option "{}".'.format(
                self.target_rotation))
        target_quat_1 /= np.linalg.norm(target_quat_1)
        if self.two_objects:
            target_quat_2 /= np.linalg.norm(target_quat_2)
            return {
                "object_1": np.concatenate([target_pos_1, target_quat_1]),
                "object_2": np.concatenate([target_pos_2, target_quat_2])
            }
        else:
            return np.concatenate([target_pos_1, target_quat_1])
Exemple #24
0
    def __init__(
        self,
        game="pong",
        mode=None,
        difficulty=None,
        obs_type="image",
        frameskip=(2, 5),
        repeat_action_probability=0.0,
        full_action_space=False,
    ):
        """
        Arguments:
            game: the name of the game ("pong", "Enduro", etc) dont add the "-v0"
            mode: different modes are available for different games.
            frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.
        """

        utils.EzPickle.__init__(self, game, mode, difficulty, obs_type,
                                frameskip, repeat_action_probability)
        assert obs_type in ("ram", "image")

        self.game = game
        self.game_path = atari_py.get_game_path(game)
        self.game_mode = mode
        self.game_difficulty = difficulty

        if not os.path.exists(self.game_path):
            msg = "You asked for game %s but path %s does not exist"
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat("repeat_action_probability".encode("utf-8"),
                          repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == "ram":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == "image":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error("Unrecognized observation type: {}".format(
                self._obs_type))
Exemple #25
0
def write_archive(videos, archive_file, env_id=None):
    if len(videos) > MAX_VIDEOS:
        raise error.Error(
            '[{}] Trying to upload {} videos, but there is a limit of {} currently. If you actually want to upload this many videos, please email [email protected] with your use-case.'
            .format(env_id, MAX_VIDEOS, len(videos)))

    logger.debug('[%s] Preparing an archive of %d videos: %s', env_id,
                 len(videos), videos)

    # Double check that there are no collisions
    basenames = set()
    manifest = {'version': 0, 'videos': []}

    with tarfile.open(fileobj=archive_file, mode='w:gz') as tar:
        for video_path, metadata_path in videos:
            video_name = os.path.basename(video_path)
            metadata_name = os.path.basename(metadata_path)

            if not os.path.exists(video_path):
                raise error.Error(
                    '[{}] No such video file {}. (HINT: Your video recorder may have broken midway through the run. You can check this with `video_recorder.functional`.)'
                    .format(env_id, video_path))
            elif not os.path.exists(metadata_path):
                raise error.Error(
                    '[{}] No such metadata file {}. (HINT: this should be automatically created when using a VideoRecorder instance.)'
                    .format(env_id, video_path))

            # Do some sanity checking
            if video_name in basenames:
                raise error.Error(
                    '[{}] Duplicated video name {} in video list: {}'.format(
                        env_id, video_name, videos))
            elif metadata_name in basenames:
                raise error.Error(
                    '[{}] Duplicated metadata file name {} in video list: {}'.
                    format(env_id, metadata_name, videos))
            elif not video_name_re.search(video_name):
                raise error.Error(
                    '[{}] Invalid video name {} (must match {})'.format(
                        env_id, video_name, video_name_re.pattern))
            elif not metadata_name_re.search(metadata_name):
                raise error.Error(
                    '[{}] Invalid metadata file name {} (must match {})'.
                    format(env_id, metadata_name, metadata_name_re.pattern))

            # Record that we've seen these names; add to manifest
            basenames.add(video_name)
            basenames.add(metadata_name)
            manifest['videos'].append((video_name, metadata_name))

            # Import the files into the archive
            tar.add(video_path, arcname=video_name, recursive=False)
            tar.add(metadata_path, arcname=metadata_name, recursive=False)

        f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
        try:
            json.dump(manifest, f)
            f.close()
            tar.add(f.name, arcname='manifest.json')
        finally:
            f.close()
            os.remove(f.name)
Exemple #26
0
    def __init__(self, scenario, variant, obs_type='image', frameskip=4):
        if scenario not in self.scenarios:
            raise error.Error("Unsupported scenario: {}".format(scenario))

        if variant not in self.scenarios[scenario]:
            raise error.Error(
                "Unsupported scenario variant: {}".format(variant))

        # Generate config (extend from baseline).
        config = {}
        config.update(self.scenarios[scenario]['baseline'])
        config.update(self.scenarios[scenario][variant])
        self._config = config

        self._vizdoom = vizdoom.DoomGame()
        self._vizdoom.set_doom_scenario_path(
            os.path.join(ASSET_PATH, config['scenario']))
        self._vizdoom.set_doom_map(config.get('map', 'MAP01'))
        self._vizdoom.set_screen_resolution(
            vizdoom.ScreenResolution.RES_640X480)
        self._vizdoom.set_screen_format(vizdoom.ScreenFormat.BGR24)
        self._vizdoom.set_mode(vizdoom.Mode.PLAYER)

        self._width = 640
        self._height = 480
        self._depth = 3

        # Entity visibility.
        self._vizdoom.set_render_hud(False)
        self._vizdoom.set_render_minimal_hud(False)
        self._vizdoom.set_render_crosshair(False)
        self._vizdoom.set_render_weapon(False)
        self._vizdoom.set_render_decals(False)
        self._vizdoom.set_render_particles(False)
        self._vizdoom.set_render_effects_sprites(False)
        self._vizdoom.set_render_messages(False)
        self._vizdoom.set_render_corpses(False)
        self._vizdoom.set_window_visible(False)
        self._vizdoom.set_sound_enabled(False)

        # Rewards.
        self._vizdoom.set_living_reward(config.get('living_reward', 1))
        self._vizdoom.set_death_penalty(config.get('death_penalty', 100))

        # Duration.
        self._vizdoom.set_episode_timeout(config.get('episode_timeout', 2100))

        # Generate action space from buttons.
        for button in self.buttons:
            self._vizdoom.add_available_button(button)

        self._action_button_map = []
        for combination in itertools.product([False, True],
                                             repeat=len(self.buttons)):
            # Exclude any pairs where opposite buttons are pressed.
            valid = True
            for a, b in self.opposite_button_pairs:
                if combination[self.buttons.index(a)] and combination[
                        self.buttons.index(b)]:
                    valid = False
                    break

            if valid:
                self._action_button_map.append(list(combination))

        self.action_space = spaces.Discrete(len(self._action_button_map))

        if obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self._height,
                                                       self._width,
                                                       self._depth))
        else:
            raise error.Error(
                "Unrecognized observation type: {}".format(obs_type))

        self._scenario = scenario
        self._variant = variant
        self._obs_type = obs_type
        self._frameskip = frameskip
        self._initialized = False
        self._temporary_scenario = None
        self._seed()
Exemple #27
0
    def _reset_sim(self):
        self.sim.set_state(self.initial_state)
        self.sim.forward()

        initial_qpos = self.sim.data.get_joint_qpos('object:joint').copy()
        initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
        assert initial_qpos.shape == (7, )
        assert initial_pos.shape == (3, )
        assert initial_quat.shape == (4, )
        initial_qpos = None

        # Randomization initial rotation.
        if self.randomize_initial_rotation:
            if self.target_rotation == 'z':
                angle = self.np_random.uniform(-np.pi, np.pi)
                axis = np.array([0., 0., 1.])
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation == 'parallel':
                angle = self.np_random.uniform(-np.pi, np.pi)
                axis = np.array([0., 0., 1.])
                z_quat = quat_from_angle_and_axis(angle, axis)
                parallel_quat = self.parallel_quats[self.np_random.randint(
                    len(self.parallel_quats))]
                offset_quat = rotations.quat_mul(z_quat, parallel_quat)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation in ['xyz', 'ignore']:
                angle = self.np_random.uniform(-np.pi, np.pi)
                axis = self.np_random.uniform(-1., 1., size=3)
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
            elif self.target_rotation == 'fixed':
                pass
            else:
                raise error.Error(
                    'Unknown target_rotation option "{}".'.format(
                        self.target_rotation))

        # Randomize initial position.
        if self.randomize_initial_position:
            if self.target_position != 'fixed':
                initial_pos += self.np_random.normal(size=3, scale=0.005)

        initial_quat /= np.linalg.norm(initial_quat)
        initial_qpos = np.concatenate([initial_pos, initial_quat])
        self.sim.data.set_joint_qpos('object:joint', initial_qpos)

        def is_on_palm():
            self.sim.forward()
            cube_middle_idx = self.sim.model.site_name2id('object:center')
            cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
            is_on_palm = (cube_middle_pos[2] > 0.04)
            return is_on_palm

        # Run the simulation for a bunch of timesteps to let everything settle in.
        for _ in range(10):
            self._set_action(np.zeros(20))
            try:
                self.sim.step()
            except mujoco_py.MujocoException:
                return False
        return is_on_palm()
Exemple #28
0
    def setup_camera_pc(self):
        ## Camera specific
        assert (self._require_camera_input)
        if self.scene_type == "building":
            self.dataset = ViewDataSet3D(transform=np.array,
                                         mist_transform=np.array,
                                         seqlen=2,
                                         off_3d=False,
                                         train=False,
                                         overwrite_fofn=True,
                                         env=self,
                                         only_load=self.config["model_id"])

        scene_dict = dict(
            zip(self.dataset.scenes, range(len(self.dataset.scenes))))
        ## Todo: (hzyjerry) more error handling
        if not self.model_id in scene_dict.keys():
            raise error.Error(
                "Dataset not found: model {} cannot be loaded".format(
                    self.model_id))
        else:
            scene_id = scene_dict[self.model_id]
        uuids, rts = self.dataset.get_scene_info(scene_id)

        targets, sources, source_depths, poses = [], [], [], []
        source_semantics = []

        if not self.multiprocessing or self.config["envname"] == "TestEnv":
            all_data = self.dataset.get_multi_index([v for k, v in uuids])
            for i, data in enumerate(all_data):
                target, target_depth = data[1], data[3]
                if not self._require_rgb:
                    continue
                ww = target.shape[0] // 8 + 2
                target[:ww, :, :] = target[ww, :, :]
                target[-ww:, :, :] = target[-ww, :, :]

                if self.scale_up != 1:
                    target = cv2.resize(target,
                                        None,
                                        fx=1.0 / self.scale_up,
                                        fy=1.0 / self.scale_up,
                                        interpolation=cv2.INTER_CUBIC)
                    target_depth = cv2.resize(target_depth,
                                              None,
                                              fx=1.0 / self.scale_up,
                                              fy=1.0 / self.scale_up,
                                              interpolation=cv2.INTER_CUBIC)
                pose = data[-1][0].numpy()
                targets.append(target)
                poses.append(pose)
                sources.append(target)
                source_depths.append(target_depth)
        else:
            all_data = self.dataset.get_multi_index([v for k, v in uuids])
            for i, data in enumerate(all_data):
                target, target_depth = data[1], data[3]
                if not self._require_rgb:
                    continue
                ww = target.shape[0] // 8 + 2
                target[:ww, :, :] = target[ww, :, :]
                target[-ww:, :, :] = target[-ww, :, :]

                if self.scale_up != 1:

                    target = cv2.resize(target,
                                        None,
                                        fx=1.0 / self.scale_up,
                                        fy=1.0 / self.scale_up,
                                        interpolation=cv2.INTER_CUBIC)
                    target_depth = cv2.resize(target_depth,
                                              None,
                                              fx=1.0 / self.scale_up,
                                              fy=1.0 / self.scale_up,
                                              interpolation=cv2.INTER_CUBIC)
                pose = data[-1][0].numpy()
                targets.append(target)
                poses.append(pose)
                sources.append(target)
                source_depths.append(target_depth)

        self.r_camera_rgb = PCRenderer(self.port_rgb,
                                       sources,
                                       source_depths,
                                       target,
                                       rts,
                                       scale_up=self.scale_up,
                                       semantics=source_semantics,
                                       gui=self.gui,
                                       use_filler=self._use_filler,
                                       gpu_idx=self.gpu_idx,
                                       windowsz=self.windowsz,
                                       env=self)
Exemple #29
0
    def _start(
        self,
        directory,
        video_callable=None,
        force=False,
        resume=False,
        write_upon_reset=False,
        uid=None,
        mode=None,
    ):
        """Start monitoring.

        Args:
            directory (str): A per-training run directory where to record stats.
            video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
            uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid().
            mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
        """
        if self.env.spec is None:
            logger.warn(
                "Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users."
            )
            env_id = "(unknown)"
        else:
            env_id = self.env.spec.id

        self.directory = os.path.abspath(directory)

        if not os.path.exists(self.directory):
            logger.info("Creating monitor directory %s", self.directory)
            os.makedirs(self.directory, exist_ok=True)

        if video_callable is None:
            video_callable = capped_cubic_video_schedule
        elif video_callable == False:
            video_callable = disable_videos
        elif not callable(video_callable):
            raise error.Error(
                "You must provide a function, None, or False for video_callable, not {}: {}"
                .format(type(video_callable), video_callable))
        self.video_callable = video_callable

        # Check on whether we need to clear anything
        if force:
            clear_monitor_files(self.directory)
        elif not resume:
            training_manifests = detect_training_manifests(self.directory)
            if len(training_manifests) > 0:
                raise error.Error(
                    """Trying to write to monitor directory {} with existing monitor files: {}.
 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files."""
                    .format(directory, ", ".join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        # We use the 'openai-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = "{}.{}".format(self._monitor_id,
                                         uid if uid else os.getpid())

        self.stats_recorder = stats_recorder.StatsRecorder(
            self.directory,
            "{}.episode_batch.{}".format(self.file_prefix, self.file_infix),
            autoreset=self.env_semantics_autoreset,
            env_id=env_id,
        )

        self.write_upon_reset = write_upon_reset

        if mode is not None:
            self._set_mode(mode)
Exemple #30
0
    def _load_level(self):
        print("loading level")
        # Closing if is_initialized
        if self.is_initialized:
            self.is_initialized = False
            self.game.close()
            self.game = vizdoom.DoomGame()

        # Customizing level
        if getattr(self, '_customize_game', None) is not None and callable(
                self._customize_game):
            self.level = -1
            self._customize_game()

        else:
            # Loading Paths
            if not self.is_initialized:
                # self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
                self.game.set_doom_game_path(self.loader.get_freedoom_path())

            # Common settings
            self.game.load_config(
                os.path.join(self.doom_dir,
                             'assets/%s' % DOOM_SETTINGS[self.level][CONFIG]))
            self.game.set_doom_scenario_path(
                self.loader.get_scenario_path(
                    DOOM_SETTINGS[self.level][SCENARIO]))
            if DOOM_SETTINGS[self.level][MAP] != '':
                self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP])
            self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY])
            self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS]
            self.game.set_screen_resolution(self.screen_resolution)

        self.previous_level = self.level
        self._closed = False

        # Algo mode
        if 'human' != self._mode:
            self.game.set_window_visible(False)
            self.game.set_mode(Mode.PLAYER)
            self.no_render = False
            try:
                with self.lock:
                    self.game.init()
            except (ViZDoomUnexpectedExitException, ViZDoomErrorException):
                raise error.Error(
                    'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. '
                    +
                    'To run VizDoom across multiple processes, you need to pass a lock when you configure the env '
                    +
                    '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env '
                    +
                    'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a '
                    + 'singleton lock in memory.')
            self._start_episode()
            self.is_initialized = True
            return self.game.get_state().screen_buffer.copy()

        # Human mode
        else:
            self.game.add_game_args('+freelook 1')
            self.game.set_window_visible(True)
            self.game.set_mode(Mode.SPECTATOR)
            self.no_render = True
            with self.lock:
                self.game.init()
            self._start_episode()
            self.is_initialized = True
            self._play_human_mode()
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)