Exemple #1
0
  def __init__(self, observation_settings, opponent=None,
               reset_arm_after_move=True):
    """Initializes a `TicTacToe` task.

    Args:
      observation_settings: An `observations.ObservationSettings` namedtuple
        specifying configuration options for each category of observation.
      opponent: TicTacToeOpponent used for generating opponent moves.
      reset_arm_after_move: Whether to reset arm to random position after every
        piece being placed on the board.
    """
    game_logic = tic_tac_toe_logic.TicTacToeGameLogic()
    if opponent is None:
      opponent = tic_tac_toe_logic.TicTacToeRandomOpponent()

    markers = pieces.Markers(num_per_player=5,
                             observable_options=observations.make_options(
                                 observation_settings,
                                 observations.MARKER_OBSERVABLES))
    self._reset_arm_after_move = reset_arm_after_move
    super(TicTacToe, self).__init__(observation_settings=observation_settings,
                                    opponent=opponent,
                                    game_logic=game_logic,
                                    board=boards.CheckerBoard(),
                                    markers=markers)
Exemple #2
0
 def test_invalid_player_id(self):
   markers = pieces.Markers(num_per_player=5)
   physics = mjcf.Physics.from_mjcf_model(markers.mjcf_model)
   invalid_player_id = 99
   with self.assertRaisesWithLiteralMatch(
       ValueError, pieces._INVALID_PLAYER_ID.format(1, 99)):
     markers.mark(physics=physics, player_id=invalid_player_id, pos=(1, 2, 3))
Exemple #3
0
 def test_too_many_moves(self):
   num_per_player = 5
   player_id = 0
   markers = pieces.Markers(num_per_player=num_per_player)
   physics = mjcf.Physics.from_mjcf_model(markers.mjcf_model)
   for _ in range(num_per_player):
     markers.mark(physics=physics, player_id=player_id, pos=(1, 2, 3))
   with self.assertRaisesWithLiteralMatch(
       RuntimeError,
       pieces._NO_MORE_MARKERS_AVAILABLE.format(num_per_player, player_id)):
     markers.mark(physics=physics, player_id=player_id, pos=(1, 2, 3))
Exemple #4
0
 def test_position_observable(self):
   num_per_player = 3
   markers = pieces.Markers(num_per_player=num_per_player)
   physics = mjcf.Physics.from_mjcf_model(markers.mjcf_model)
   all_positions = [
       [(0, 1, 2), (3, 4, 5), (6, 7, 8)],  # Player 0
       [(-1, 2, -3), (4, -5, 6)],  # Player 1
   ]
   for player_id, positions in enumerate(all_positions):
     for marker_pos in positions:
       markers.mark(physics=physics, player_id=player_id, pos=marker_pos)
   expected_positions = np.zeros((2, num_per_player, 3), dtype=np.double)
   expected_positions[0, :len(all_positions[0])] = all_positions[0]
   expected_positions[1, :len(all_positions[1])] = all_positions[1]
   observed_positions = markers.observables.position(physics)
   np.testing.assert_array_equal(
       expected_positions.reshape(-1, 3), observed_positions)
Exemple #5
0
  def __init__(self, board_size, observation_settings, opponent=None,
               reset_arm_after_move=True):
    """Initializes a `Go` task.

    Args:
      board_size: board size
      observation_settings: An `observations.ObservationSettings` namedtuple
        specifying configuration options for each category of observation.
      opponent: Go opponent to use for the opponent player actions.
      reset_arm_after_move: Whether to reset arm to random position after every
        piece being placed on the board.
    """
    game_logic = go_logic.GoGameLogic(board_size=board_size)

    if opponent is None:
      opponent = go_logic.GoGTPOpponent(board_size=board_size,
                                        mixture_p=_DEFAULT_OPPONENT_MIXTURE)

    self._last_valid_move_is_pass = False
    super(Go, self).__init__(observation_settings=observation_settings,
                             opponent=opponent,
                             game_logic=game_logic,
                             board=boards.GoBoard(boardsize=board_size),
                             markers=pieces.Markers(
                                 player_colors=(_BLACK, _WHITE),
                                 halfwidth=_GO_PIECE_SIZE,
                                 num_per_player=board_size*board_size*2,
                                 observable_options=observations.make_options(
                                     observation_settings,
                                     observations.MARKER_OBSERVABLES),
                                 board_size=board_size))
    self._reset_arm_after_move = reset_arm_after_move
    # Add an observable exposing the move history (to reconstruct game states)
    move_history_observable = observable.Generic(
        lambda physics: self._game_logic.get_move_history())
    move_history_observable.configure(
        **observation_settings.board_state._asdict())
    self._task_observables['move_history'] = move_history_observable