Exemple #1
0
    def uct_selection(self, states):
        b = self.board

        init1 = time.time()
        list_not_played = []
        # TODO niet uitvoeren als alle next states al gevisit zijn.
        for move, state in states:
            if self.plays[(b.current_player(state), hashable(state))] == 0:
                list_not_played.append((state, move))

        init2 = time.time()
        # print("Time to make list: {}".format(init2 - init1))

        if len(list_not_played) > 0:
            return choice(list_not_played)

        init3 = time.time()
        # print("Time to make random choice from unplayed: {}".format(init3 - init2))

        log_total = math.log(
            sum(self.plays[(b.current_player(S), hashable(S))]
                for p, S in states))

        value, move, state = max(
            ((float(self.wins[(b.current_player(S), hashable(S))]) /
              self.plays[(b.current_player(S), hashable(S))]) +
             1.4 * math.sqrt(float(log_total) /
                             self.plays[(b.current_player(S), hashable(S))])
             + self.get_prediction_value(S[1:], b.rows, b.cols)
             , p, S)
            for p, S in states)

        # print("Selected state with uct value:" + str(value))
        return state, move
    def simulate_random_game(self, state, player1):

        state_copy = np.copy(state)
        player = self.board.current_player(state)
        states_simulated = list()
        states_simulated.append((player, hashable(state)))

        while not self.board.is_finished(state_copy):
            possible_moves = self.board.legal_plays(state_copy)
            random_move = choice(possible_moves)

            state_copy, move = self.board.next_state(state_copy, random_move)

            # ENTRA
            new_player = self.board.current_player(state_copy)
            state_hash = hashable(state_copy)
            states_simulated.append((new_player, state_hash))

            if (new_player, state_hash) not in self.plays.keys():
                self.plays[(new_player, state_hash)] = 0
                self.wins[(new_player, state_hash)] = 0

        winner = self.board.winner(state_copy)

        # EXTRA

        for player, hash_state in states_simulated:
            self.plays[(player, hash_state)] += 1
            if player1 == winner:
                self.wins[(player, hash_state)] += 1
    def get_best_move(self, player, next_states):

        win_rate, best_move, state = max(
            (self.wins.get((player, hashable(state)), 0) /
             self.plays.get((player, hashable(state)), 1), move, state)
            for move, state in next_states)

        print("Picked move " + str(best_move) + " with win rate of " +
              str(win_rate))
        r, c, o = self.board.translate_to_coord(best_move)
        return state, best_move, r, c, o
    def expand_state(self, current_state):
        next_states = []
        moves = self.board.legal_plays(current_state)
        for move in moves:
            new_state, move_done = self.board.next_state(current_state, move)
            next_states.append((move_done, new_state))

            player = self.board.current_player(new_state)

            self.plays[(player, hashable(new_state))] = 0
            self.wins[(player, hashable(new_state))] = 0

        return next_states
Exemple #5
0
    def simulate_random_game(self, player1, time_limit):
        start_sim = time.time()
        # print("Simulation time: " + str(time_limit))
        # if time_limit < 0:
        #     print("Stopping")
        #     return
        # init1 = time.time()
        state, move = self.uct_selection(self.next_states)
        # init2 = time.time()
        player = self.board.current_player(state)
        # init3 = time.time()
        states_simulated = list()
        # init4 = time.time()
        states_simulated.append((player, hashable(state)))
        # init5 = time.time()
        state_copy = np.copy(state)
        # print("Time to init board: {}".format(init5 - init1))
        # t = init2 - init1

        while not self.board.is_finished(state_copy) \
                and ((time.time() - start_sim) < time_limit):

            next_states = self.expand_state(state_copy)

            # uct = time.time()
            state_copy, move = self.uct_selection(next_states)
            # print("UCT Time: {}".format(time.time()-uct))

            state_hash = hashable(state_copy)
            new_player = self.board.current_player(state_copy)
            states_simulated.append((new_player, state_hash))
        # finished = time.time()
        # print("Time to finish board: {}".format(finished-finish))

        if self.board.is_finished(state_copy):
            winner = self.board.winner(state_copy)
            # EXTRA

            for player, hash_state in states_simulated:
                self.plays[(player, hash_state)] += 1
                if player1 == winner:
                    self.wins[(player, hash_state)] += 1
Exemple #6
0
    def get_best_move(self, next_states):

        best_win = 0
        best_move, best_state = choice(next_states)
        # begin = time.time()
        for move, state in next_states:
            plays = self.plays.get((self.board.current_player(state),
                                    hashable(state)), 1)
            if plays != 0:
                wins = self.wins.get((self.board.current_player(state),
                                      hashable(state)), 0)
                win = float(wins)/plays
                if win > best_win:
                    best_win = win
                    best_move = move
                    best_state = state
        # print("Time to pick best move: {}".format(time.time() - begin))
        # print("Picked move " + str(best_move) + " with win rate of " + str(best_win))
        r, c, o = self.board.translate_to_coord(best_move)
        return best_state, best_move, r, c, o
    def simulate_random_game(self, state, player1):

        state_copy = np.copy(state)
        player = self.board.current_player(state)

        while not self.board.is_finished(state_copy):
            possible_moves = self.board.legal_plays(state_copy)
            random_move = choice(possible_moves)

            state_copy, move = self.board.next_state(state_copy, random_move)

        winner = self.board.winner(state_copy)
        state_hash = hashable(state)

        self.plays[(player, state_hash)] += 1
        if player1 == winner:
            self.wins[(player, state_hash)] += 1