def take_turn(self, game_state: GameState) -> Tuple[GameState, int]: # First, we need to calculate the outcome of the next game states. possible_next_states, _ = game_state.get_possible_moves() next_scores = { self.feed_forward(state.to_1P_array()): state for state in possible_next_states } best_score = max(next_scores) next_score = best_score if (game_state.has_player_won() is None ) else game_state.has_player_won() next_state = next_scores.get(best_score).copy() return next_state, next_score
# Parameters D = deque() # Register where the actions will be stored observetime = 500 # Number of timesteps we will be acting on the game and observing results epsilon = 0.7 # Probability of doing a random move gamma = 0.9 # Discounted future reward. How much we care about steps further in time mb_size = 500 # Learning minibatch size # FIRST STEP: Knowing what each action does (Observing) observation = GameState(["Michael"]) # Game begins state = observation.to_array() done = False turns = 0 for t in range(observetime): possible_moves = observation.get_possible_moves()[0] if (DEBUG): if t < 10 or t % 20 == 0: print("t = " + str(t)) print(observation) print("len = " + str(len(possible_moves))) if (t % 100 == 0 and turns >= 100): observation = GameState(["Michael"]) # Game begins state = observation.to_array() done = False turns = 0 print("RESTART: t = " + str(t)) possible_states = np.array( list(map(lambda move: move.to_array(), possible_moves)))