def run_game(self, game_state: GameState, game_id, debug=False): start_time = time.time() while not game_state.is_game_over(): next_state, next_score = self.take_turn(game_state) # Back propagate for new weights. self.back_propagate(game_state.to_array(), next_score) game_state = next_state if debug: print(f"moved to state with score {best_score}") end_time = time.time() if debug: print(f"Game {game_id} ended with {game_state.has_player_won()}!" f" Turns: {game_state.turns}, Time: {end_time-start_time}s") return game_state.has_player_won(), game_state.turns
model.add(Dense(10, init='uniform', activation='relu')) model.add(Dense(1, init='uniform', activation='linear')) model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) # Parameters D = deque() # Register where the actions will be stored observetime = 500 # Number of timesteps we will be acting on the game and observing results epsilon = 0.7 # Probability of doing a random move gamma = 0.9 # Discounted future reward. How much we care about steps further in time mb_size = 500 # Learning minibatch size # FIRST STEP: Knowing what each action does (Observing) observation = GameState(["Michael"]) # Game begins state = observation.to_array() done = False turns = 0 for t in range(observetime): possible_moves = observation.get_possible_moves()[0] if (DEBUG): if t < 10 or t % 20 == 0: print("t = " + str(t)) print(observation) print("len = " + str(len(possible_moves))) if (t % 100 == 0 and turns >= 100): observation = GameState(["Michael"]) # Game begins state = observation.to_array() done = False turns = 0 print("RESTART: t = " + str(t))