Example #1
0
def average_score(strategy):
  """Plays 100 games, returns average score."""

  scores = []
  for _ in range(100):
    score, _ = play.play(strategy, allow_unavailable_action=False)
    scores.append(score)
  return np.mean(scores)
Example #2
0
def average_score(strategy):
    """Plays 100 games, returns average score."""

    scores = []
    for _ in range(100):
        score, _ = play.play(strategy, allow_unavailable_action=False)
        scores.append(score)
    return np.mean(scores)
  def collect(self, strategy, num_games=1):
    """Plays num_games random games, returns all collected experiences."""

    experiences = []
    for _ in range(num_games):
      _, new_experiences = play.play(strategy, allow_unavailable_action=False)
      deduplicated_experiences = self.deduplicate(new_experiences)
      count = len(deduplicated_experiences)
      experiences += [e for index, e in enumerate(deduplicated_experiences)
                      if (np.random.rand() <
                          self.get_keep_probability(index, count))]
    return experiences
Example #4
0
  def collect(self, strategy, num_games=1):
    """Plays num_games random games, returns all collected experiences."""

    experiences = []
    for _ in range(num_games):
      _, new_experiences = play.play(strategy, allow_unavailable_action=False)
      deduplicated_experiences = self.deduplicate(new_experiences)
      count = len(deduplicated_experiences)
      experiences += [e for index, e in enumerate(deduplicated_experiences)
                      if (np.random.rand() <
                          self.get_keep_probability(index, count))]
    return experiences
Example #5
0
def test_play(game_class_mock):
  state1 = np.ones((4, 4))
  state2 = np.ones((4, 4)) * 2
  state3 = np.ones((4, 4)) * 3

  game = game_class_mock.return_value
  game.game_over.side_effect = [False, False, True]
  game.state.side_effect = [state1, state2, state3]
  game.available_actions.side_effect = [[1, 2, 3], [0, 1, 2], [0, 1, 2], []]
  game.do_action.side_effect = [1, 2]
  game.score.return_value = 1234

  strategy = Mock(side_effect=[1, 2])

  score, experiences = play(strategy, allow_unavailable_action=False)

  game.do_action.assert_has_calls([call(1), call(2)])
  # Manually need to check strategy arguments, because numpy array overrides
  # == operator...
  assert (strategy.call_args_list[0][0][0] == state1).all()
  assert strategy.call_args_list[0][0][1] == [1, 2, 3]
  assert (strategy.call_args_list[1][0][0] == state2).all()
  assert strategy.call_args_list[1][0][1] == [0, 1, 2]

  assert score == 1234

  assert len(experiences) == 2

  assert (experiences[0].state == state1).all()
  assert experiences[0].action == 1
  assert experiences[0].reward == 1
  assert (experiences[0].next_state == state2).all()
  assert experiences[0].game_over == False
  assert experiences[0].next_state_available_actions == [0, 1, 2]

  assert (experiences[1].state == state2).all()
  assert experiences[1].action == 2
  assert experiences[1].reward == 2
  assert (experiences[1].next_state == state3).all()
  assert experiences[1].game_over == True
  assert experiences[1].next_state_available_actions == []
Example #6
0
def test_play(game_class_mock):
    state1 = np.ones((4, 4))
    state2 = np.ones((4, 4)) * 2
    state3 = np.ones((4, 4)) * 3

    game = game_class_mock.return_value
    game.game_over.side_effect = [False, False, True]
    game.state.side_effect = [state1, state2, state3]
    game.available_actions.side_effect = [[1, 2, 3], [0, 1, 2], [0, 1, 2], []]
    game.do_action.side_effect = [1, 2]
    game.score.return_value = 1234

    strategy = Mock(side_effect=[1, 2])

    score, experiences = play(strategy, allow_unavailable_action=False)

    game.do_action.assert_has_calls([call(1), call(2)])
    # Manually need to check strategy arguments, because numpy array overrides
    # == operator...
    assert (strategy.call_args_list[0][0][0] == state1).all()
    assert strategy.call_args_list[0][0][1] == [1, 2, 3]
    assert (strategy.call_args_list[1][0][0] == state2).all()
    assert strategy.call_args_list[1][0][1] == [0, 1, 2]

    assert score == 1234

    assert len(experiences) == 2

    assert (experiences[0].state == state1).all()
    assert experiences[0].action == 1
    assert experiences[0].reward == 1
    assert (experiences[0].next_state == state2).all()
    assert experiences[0].game_over == False
    assert experiences[0].next_state_available_actions == [0, 1, 2]

    assert (experiences[1].state == state2).all()
    assert experiences[1].action == 2
    assert experiences[1].reward == 2
    assert (experiences[1].next_state == state3).all()
    assert experiences[1].game_over == True
    assert experiences[1].next_state_available_actions == []
Example #7
0
def play_single_game(train_dir):
  """Play a single game using the latest model snapshot in train_dir."""

  s, _ = play.play(make_greedy_strategy(train_dir, True),
                   allow_unavailable_action=False)
  print(s)
Example #8
0
def play_single_game(train_dir):
    """Play a single game using the latest model snapshot in train_dir."""

    s, _ = play.play(make_greedy_strategy(train_dir, True),
                     allow_unavailable_action=False)
    print(s)