コード例 #1
0
ファイル: network.py プロジェクト: sillle14/splendor
    def run_game(self, game_state: GameState, game_id, debug=False):
        start_time = time.time()
        while not game_state.is_game_over():

            next_state, next_score = self.take_turn(game_state)
            # Back propagate for new weights.
            self.back_propagate(game_state.to_array(), next_score)
            game_state = next_state
            if debug:
                print(f"moved to state with score {best_score}")
        end_time = time.time()
        if debug:
            print(f"Game {game_id} ended with {game_state.has_player_won()}!"
                  f" Turns: {game_state.turns}, Time: {end_time-start_time}s")
        return game_state.has_player_won(), game_state.turns
コード例 #2
0
model.add(Dense(10, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='linear'))

model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

# Parameters
D = deque()  # Register where the actions will be stored

observetime = 500  # Number of timesteps we will be acting on the game and observing results
epsilon = 0.7  # Probability of doing a random move
gamma = 0.9  # Discounted future reward. How much we care about steps further in time
mb_size = 500  # Learning minibatch size

# FIRST STEP: Knowing what each action does (Observing)
observation = GameState(["Michael"])  # Game begins
state = observation.to_array()
done = False
turns = 0
for t in range(observetime):
    possible_moves = observation.get_possible_moves()[0]
    if (DEBUG):
        if t < 10 or t % 20 == 0:
            print("t = " + str(t))
            print(observation)
            print("len = " + str(len(possible_moves)))
    if (t % 100 == 0 and turns >= 100):
        observation = GameState(["Michael"])  # Game begins
        state = observation.to_array()
        done = False
        turns = 0
        print("RESTART: t = " + str(t))