コード例 #1
0
ファイル: network.py プロジェクト: sillle14/splendor
    def take_turn(self, game_state: GameState) -> Tuple[GameState, int]:
        # First, we need to calculate the outcome of the next game states.
        possible_next_states, _ = game_state.get_possible_moves()
        next_scores = {
            self.feed_forward(state.to_1P_array()): state
            for state in possible_next_states
        }

        best_score = max(next_scores)
        next_score = best_score if (game_state.has_player_won() is None
                                    ) else game_state.has_player_won()
        next_state = next_scores.get(best_score).copy()
        return next_state, next_score
コード例 #2
0
# Parameters
D = deque()  # Register where the actions will be stored

observetime = 500  # Number of timesteps we will be acting on the game and observing results
epsilon = 0.7  # Probability of doing a random move
gamma = 0.9  # Discounted future reward. How much we care about steps further in time
mb_size = 500  # Learning minibatch size

# FIRST STEP: Knowing what each action does (Observing)
observation = GameState(["Michael"])  # Game begins
state = observation.to_array()
done = False
turns = 0
for t in range(observetime):
    possible_moves = observation.get_possible_moves()[0]
    if (DEBUG):
        if t < 10 or t % 20 == 0:
            print("t = " + str(t))
            print(observation)
            print("len = " + str(len(possible_moves)))
    if (t % 100 == 0 and turns >= 100):
        observation = GameState(["Michael"])  # Game begins
        state = observation.to_array()
        done = False
        turns = 0
        print("RESTART: t = " + str(t))

    possible_states = np.array(
        list(map(lambda move: move.to_array(), possible_moves)))