Beispiel #1
0
    def policy(self, game):
        with self._graph.as_default():
            predictions = self._model.predict(
                -game.board().reshape(1, 15, 15, 1))

        available = numpy.zeros((225, 1))
        positions = util.list_positions(game.board(), renju.Player.NONE)

        for pos in positions:
            available[pos[0] * 15 + pos[1]] = 1
        arr = predictions.T + available * 0.1

        code_move = numpy.argmax(arr)
        print(self._name + ':', util.to_move([code_move // 15,
                                              code_move % 15]))
        return arr
Beispiel #2
0
    def process_move(self, move):
        # check and record move, if not finished

        if not self.game.is_posible_move(move):
            self.board_canvas.print_message("Move is invalid!")
            return False

        self.board_canvas.place_move(move, self.game._player.__repr__())
        self.board_canvas.print_message(self.game._player.__repr__() +
                                        " move is: " + util.to_move(move))

        if not self.game.move(move):
            self.finish_game("Player " + self.game._player.__repr__() +
                             " wins")
            return False

        return True
Beispiel #3
0
def choose_move(board):
    positions = util.list_positions(board, renju.Player.NONE)
    model = load_model('modelusual.h5')
    model_graph = tf.get_default_graph()
    model_graph_2 = tf.get_default_graph()
    model_2 = load_model('modelsuper.h5')
    SUMEKENOV = CnnAgent(color='black',
                         name='SUMEKENOV',
                         model=(model, model_graph))
    SUMEKENOV_2 = CnnAgent(color='black',
                           name='SUMEKENOV_2',
                           model=(model_2, model_graph_2))
    arr = SUMEKENOV.policy(game)
    arr_2 = SUMEKENOV_2.policy(game)
    arr_final = arr + arr_2
    mv = numpy.argmax(arr_final, axis=1)
    return util.to_move(mv)
Beispiel #4
0
    def policy(self, game):
        if (self.flag):
            newBoard = numpy.zeros((1, 15, 15, 1))
            for i in range(15):
                for j in range(15):
                    if (game.board()[i][j] == -1):
                        newBoard[0][i][j][0] = 0.5
                    if (game.board()[i][j] == 1):
                        newBoard[0][i][j][0] = 1
            with self._graph.as_default():
                predictions = self._model.predict(newBoard)

            available = numpy.zeros((225, 1))
            positions = util.list_positions(game.board(), renju.Player.NONE)

            for pos in positions:
                available[pos[0] * 15 + pos[1]] = 1

            arr = predictions.T * available

            code_move = numpy.argmax(arr)
            #             maxmove = 0
            #             move = 0
            #             xy = 0
            #             for m in predictions[0]:
            #                 if ((m > maxmove) and (available[xy][0])):
            #                     move = xy
            #                     maxmove = m
            #                 xy += 1
            #             code_move = maxmove
            print(self._name + ':',
                  util.to_move([code_move // 15, code_move % 15]))
            return arr
        else:
            move = input()
            pos = util.to_pos(move)

            probs = numpy.zeros(game.shape)
            probs[pos] = 1.0

            return probs
Beispiel #5
0
 def policy(self, game):
     if (self._color == 'black' and len(
             util.list_positions(game.board(), renju.Player.NONE)) == 225):
         res = numpy.zeros((225, 1))
         res[142] = 1
         return res.reshape((1, 225))
     predictions = numpy.zeros((1, 225))
     for model in self._models:
         with model[1].as_default():
             predictions += model[0].predict(
                 -game.board().reshape(1, 15, 15, 1))
     available = numpy.zeros((225, 1))
     positions = util.list_positions(game.board(), renju.Player.NONE)
     for pos in positions:
         available[pos[0] * 15 + pos[1]] = 1
     arr = predictions.T + available
     code_move = numpy.argmax(arr)
     if (self._verbose):
         print(self._name + ':',
               util.to_move([code_move // 15, code_move % 15]))
     return arr
Beispiel #6
0
    def policy(self, game):
        from time import time
        if (self._color == 'black' and len(
                util.list_positions(game.board(), renju.Player.NONE)) == 225):
            res = numpy.zeros((225, 1))
            res[112] = 1
            return res.reshape((1, 225))

        board = numpy.copy(-game.board())
        available = numpy.zeros(225)
        checker = numpy.zeros(225)

        for parsed_pos in util.list_positions(game.board(), renju.Player.NONE):
            pos = parsed_pos[0] * 15 + parsed_pos[1]
            parsed_pos = tuple(parsed_pos)

            board[parsed_pos] = 1
            if (util.check(board, parsed_pos)):
                checker[pos] += 1
            board[parsed_pos] = -1
            if (util.check(board, parsed_pos)):
                checker[pos] += 1

            board[parsed_pos] = 0
            available[pos] = 1

        start = time()
        with self._graph.as_default():
            predictions = self._model.predict(board.reshape(1, 15, 15, 1))[0]

        arr = (predictions * available) * (1 + checker)

        code_move = numpy.argmax(arr)
        if (self._verbose):
            print(self._name + ':',
                  util.to_move([code_move // 15, code_move % 15]),
                  time() - start)
        return arr.reshape(1, 225)
Beispiel #7
0
    def policy(self, game):
        start = time()
        if (self._color == 'black' and len(
                util.list_positions(game.board(), renju.Player.NONE)) == 225):
            res = numpy.zeros((225, 1))
            res[112] = 1
            return res.reshape((1, 225))
        predictions = 0
        temp_high = 0

        if self._color == 'black':
            with self._black_graph.as_default():
                predictions = self._black_model.predict(
                    -game.board().reshape(1, 15, 15, 1),
                    batch_size=1,
                    verbose=0)[0]
        else:
            with self._white_graph.as_default():
                predictions = self._white_model.predict(
                    -game.board().reshape(1, 15, 15, 1),
                    batch_size=1,
                    verbose=0)[0]

        res = numpy.zeros(225)
        n = numpy.zeros(225)

        def rollout(board,
                    temp_high,
                    color,
                    checking_pos,
                    max_high=self._high,
                    gamma=self._gamma):

            temp_predictions = 0
            if color == 'black':
                with self._black_graph.as_default():
                    temp_predictions = self._black_model.predict(board.reshape(
                        1, 15, 15, 1),
                                                                 batch_size=1,
                                                                 verbose=0)[0]
            else:
                with self._white_graph.as_default():
                    temp_predictions = self._white_model.predict(board.reshape(
                        1, 15, 15, 1),
                                                                 batch_size=1,
                                                                 verbose=0)[0]

            temp_pos = numpy.random.choice(225, p=temp_predictions)
            temp_parsed_pos = numpy.unravel_index(temp_pos, (15, 15))

            if (board[temp_parsed_pos] != 0):
                if (temp_high % 2):
                    res[checking_pos] += gamma**(temp_high - 1)
                else:
                    res[checking_pos] -= gamma**(temp_high - 1)
                return

            if (util.check(board, temp_parsed_pos)):
                if (temp_high % 2):
                    res[checking_pos] -= gamma**(temp_high - 1)
                else:
                    res[checking_pos] += gamma**(temp_high - 1)
                return

            board[temp_parsed_pos] = 1
            next_color = 'white'
            if color == 'white':
                next_color = 'black'
                board[temp_parsed_pos] = -1

            if (temp_high < max_high):
                rollout(board, temp_high + 1, next_color, checking_pos)
            return

        i = 0
        while (time() - start < self._timeout and i < self._samples):
            i += 1
            pos = numpy.random.choice(225, p=predictions)
            parsed_pos = numpy.unravel_index(pos, (15, 15))

            board = numpy.copy(-game.board())
            n[pos] += 1

            if (board[parsed_pos] != 0):
                res[pos] -= 1
                continue

            if (util.check(board, parsed_pos)):
                res[pos] = res.max() + 1
                break

            board[parsed_pos] = 1
            color = 'white'
            if self._color == 'white':
                color = 'black'
                board[parsed_pos] = -1

            rollout(board, 1, color, pos)

        values = (res + 10 * predictions) / (1 + n) + (res > 0.5 * n)

        if (self._verbose):
            code_move = numpy.argmax(values.reshape(1, 225))
            print(self._name + ':', util.to_move([code_move // 15, code_move % 15]), \
                  'working time:', time() - start, 'iterations:', i)

        #values -= values.mean()
        #values /= values.std()
        return values.reshape(1, 225)
Beispiel #8
0
    def policy(self, game):
        if (self._color == 'black' and len(
                util.list_positions(game.board(), renju.Player.NONE)) == 225):
            res = numpy.zeros((225, 1))
            res[142] = 1
            return res.reshape((1, 225))
        predictions = 0
        temp_high = 0

        if self._color == 'black':
            #positions = util.list_positions(game.board(), renju.Player.NONE)
            #if (len(positions) == 225):

            with self._black_graph.as_default():
                predictions = self._black_model.predict(
                    -game.board().reshape(1, 15, 15, 1),
                    batch_size=1,
                    verbose=0)[0]
        else:
            with self._white_graph.as_default():
                predictions = self._white_model.predict(
                    -game.board().reshape(1, 15, 15, 1),
                    batch_size=1,
                    verbose=0)[0]

        max_sum_log = -100000000000000
        best_move = -1
        results = dict()
        danger = set()

        def check_state_white(board, pos, checking_pos, temp_prob, temp_high,
                              results):
            # print(board, pos)
            temp_high += 1
            parsed_pos = numpy.unravel_index(pos, (15, 15))
            #if (util.check(-board, parsed_pos)):
            #if self._color == 'white':
            #checking_pos = -1
            #return
            #results[0] = checking_pos
            #results[1] = 0
            #return

            if (temp_high > self._high):
                if (checking_pos not in danger):
                    if (results[checking_pos] < temp_prob):
                        results[checking_pos] = temp_prob
                return

            board[parsed_pos] = 1
            white_predict = 0
            with self._white_graph.as_default():
                white_predict = self._white_model.predict(
                    board.reshape(1, 15, 15, 1))[0]
            top_args_white = numpy.argsort(white_predict)[::-1][:self._width]

            for pos in top_args_white:
                if (util.check(-board, parsed_pos)):
                    if (self._color == 'black'):
                        danger.add(checking_pos)
                        continue
                if checking_pos not in danger:
                    check_state_black(
                        numpy.copy(board), pos, checking_pos,
                        temp_prob + numpy.log(white_predict[pos]), temp_high,
                        results)

        def check_state_black(board, pos, checking_pos, temp_prob, temp_high,
                              results):
            # print(board, pos)
            # global max_sum_log, best_move
            temp_high += 1
            parsed_pos = numpy.unravel_index(pos, (15, 15))
            #if (util.check(-board, parsed_pos)):
            #    if self._color == 'black':
            #checking_pos = -1
            #return
            #results[0] = checking_pos
            #results[1] = 0
            #return

            if (temp_high > self._high):
                if (checking_pos not in danger):
                    if (results[checking_pos] < temp_prob):
                        results[checking_pos] = temp_prob
                return

            board[parsed_pos] = -1
            black_predict = 0
            with self._black_graph.as_default():
                black_predict = self._black_model.predict(
                    board.reshape(1, 15, 15, 1))[0]
            top_args_black = numpy.argsort(black_predict)[::-1][:self._width]

            for pos in top_args_black:
                if (util.check(-board, parsed_pos)):
                    if (self._color == 'white'):
                        danger.add(checking_pos)
                        continue
                if checking_pos not in danger:
                    check_state_white(
                        numpy.copy(board), pos, checking_pos,
                        temp_prob + numpy.log(black_predict[pos]), temp_high,
                        results)

        top_args = numpy.argsort(predictions)[::-1][:self._width]
        # print(top_args)
        for pos in top_args:
            results[pos] = -100000000000
            if (self._verbose):
                print(numpy.unravel_index(pos, (15, 15)), end=' - ')
            if (self._color == 'black'):
                check_state_white(numpy.copy(-game.board()), pos, pos,
                                  numpy.log(predictions[pos]), temp_high,
                                  results)
            else:
                check_state_black(numpy.copy(-game.board()), pos, pos,
                                  numpy.log(predictions[pos]), temp_high,
                                  results)
        max_sum_log, best_move = -100000000, -1
        for move in results:
            if move not in danger:
                if max_sum_log < results[move]:
                    max_sum_log = results[move]
                    best_move = move
        res = numpy.zeros((225, 1))
        res[best_move] = 1
        code_move = numpy.argmax(res.reshape(1, 225))
        if (self._verbose):
            print(self._name + ':',
                  util.to_move([code_move // 15, code_move % 15]))
        return res.reshape(1, 225)
Beispiel #9
0
    def policy(self, game):
        start = time()
        if (self._color == 'black' and len(
                util.list_positions(game.board(), renju.Player.NONE)) == 225):
            res = numpy.zeros((225, 1))
            res[112] = 1
            return res.reshape((1, 225))
        predictions = 0
        temp_high = 0

        checker = numpy.zeros(225)
        available = numpy.zeros(225)
        board = numpy.copy(-game.board())
        for parsed_pos in util.list_positions(game.board(), renju.Player.NONE):
            pos = parsed_pos[0] * 15 + parsed_pos[1]
            parsed_pos = tuple(parsed_pos)

            board[parsed_pos] = 1
            if (util.check(board, parsed_pos)):
                checker[pos] += 1
            board[parsed_pos] = -1
            if (util.check(board, parsed_pos)):
                checker[pos] += 1
            board[parsed_pos] = 0
            available[pos] = 1

        if self._color == 'black':
            with self._black_graph.as_default():
                predictions = self._black_model.predict(board.reshape(
                    1, 15, 15, 1),
                                                        batch_size=1,
                                                        verbose=0)[0]
        else:
            with self._white_graph.as_default():
                predictions = self._white_model.predict(board.reshape(
                    1, 15, 15, 1),
                                                        batch_size=1,
                                                        verbose=0)[0]

        logs = numpy.zeros(225, dtype=numpy.float32)
        res = numpy.zeros(225, dtype=numpy.float32)
        n = numpy.zeros(225, dtype=numpy.float32)

        def rollout(board,
                    temp_high,
                    color,
                    checking_pos,
                    sum_logs,
                    max_high=self._high,
                    gamma=self._gamma,
                    fine=self._fine,
                    bonus=self._bonus):

            temp_predictions = 0
            if color == 'black':
                with self._black_graph.as_default():
                    temp_predictions = self._black_model.predict(board.reshape(
                        1, 15, 15, 1),
                                                                 batch_size=1,
                                                                 verbose=0)[0]
            else:
                with self._white_graph.as_default():
                    temp_predictions = self._white_model.predict(board.reshape(
                        1, 15, 15, 1),
                                                                 batch_size=1,
                                                                 verbose=0)[0]

            temp_pos = numpy.random.choice(225, p=temp_predictions)
            temp_parsed_pos = numpy.unravel_index(temp_pos, (15, 15))

            if (board[temp_parsed_pos] != 0):
                if (temp_high % 2 == 0):
                    res[checking_pos] += fine * gamma**(temp_high - 1)
                else:
                    res[checking_pos] -= bonus * gamma**(temp_high - 1)
                return

            board[temp_parsed_pos] = 1
            next_color = 'white'
            if color == 'white':
                next_color = 'black'
                board[temp_parsed_pos] = -1

            if (util.check(board, temp_parsed_pos)):
                if (temp_high % 2 == 0):
                    res[checking_pos] -= fine * gamma**(temp_high - 1)
                else:
                    res[checking_pos] += bonus * gamma**(temp_high - 1)
                return

            if (temp_high < max_high):
                rollout(board, temp_high + 1, next_color, checking_pos,
                        sum_logs + numpy.log(temp_predictions[temp_pos]))
            else:
                if logs[checking_pos]:
                    logs[checking_pos] = max(
                        logs[checking_pos],
                        sum_logs + numpy.log(temp_predictions[temp_pos]))
                else:
                    logs[checking_pos] = sum_logs + numpy.log(
                        temp_predictions[temp_pos])
            return

        i = 0
        while (time() - start < self._timeout and i < self._samples):
            i += 1
            pos = numpy.random.choice(225, p=predictions)
            parsed_pos = numpy.unravel_index(pos, (15, 15))
            n[pos] += 1

            if (board[parsed_pos] != 0):
                res[pos] -= 1
                continue

            board[parsed_pos] = 1
            color = 'white'
            if self._color == 'white':
                color = 'black'
                board[parsed_pos] = -1

            if (util.check(board, parsed_pos)):
                res[pos] += res.max() + 1000
                break

            rollout(numpy.copy(board), 1, color, pos,
                    numpy.log(predictions[pos]))
            board[parsed_pos] = 0

        for indx in numpy.where(logs == 0.0):
            logs[indx] = logs.min() - 100.0

        value = (logs) * available * (1 + checker)

        #print(value)
        value /= value.sum()

        if numpy.max(value) > 0.8:
            code_move = numpy.argmax(value)
        else:
            if (self._verbose):
                print('----!!!----')
            code_move = numpy.argmax((n + res) * available * (1 + checker))

        ans = numpy.zeros(225)
        ans[code_move] = 1

        if (self._verbose):
            for elem in numpy.where(res != 0)[0]:
                print(util.to_move(numpy.unravel_index(int(elem), (15, 15))),
                      'R:', res[int(elem)], 'V:', value[int(elem)], 'N:',
                      n[int(elem)])

            # code_move = numpy.argmax(values.reshape(1, 225))
            print(self._name + ':', util.to_move([code_move // 15, code_move % 15]), \
                  'working time:', time() - start, 'iterations:', i)

        return ans.reshape(1, 225)
Beispiel #10
0
    def policy(self, game):
        self._start_time = time()
        if not self._color:
            if ((225 - len(util.list_positions(game.board(), renju.Player.NONE))) % 2 == 1):
                self._color = 'white'
            else:
                self._color = 'black'
        if (len(util.list_positions(game.board(), renju.Player.NONE)) == 225):
            res = numpy.zeros((225, 1))
            res[112] = 1
            return res.reshape((1, 225))
        
        done = True
        if (self._root and len(util.list_positions(game.board(), renju.Player.NONE)) < 224):
            to_do_moves = game._positions[-2:]
            for move in to_do_moves:
                if not self.make_move(move[0] * 15 + move[1]):
                    done = False
        else:
            done = False
    
        if self._verbose:
            print(done)

        checker = np.zeros(225)
        available = numpy.zeros(225)
        self._board = np.copy(-game.board())
        for parsed_pos in util.list_positions(self._board, renju.Player.NONE):
            pos = parsed_pos[0] * 15 + parsed_pos[1]
            parsed_pos = tuple(parsed_pos)
            
            self._board[parsed_pos] = 1
            if (util.check(self._board, parsed_pos)):
                checker[pos] += 1
            self._board[parsed_pos] = -1
            if (util.check(self._board, parsed_pos)):
                checker[pos] += 1
            self._board[parsed_pos] = 0
            available[pos] = 1
        
        if not done:
            self._root = Node(self._board, self._color, self._node_model_black, self._node_model_white)
        
        self.tree_search()

        values = (self._root._N > self._iters / 5) * self._root._R / (1 + self._root._N) * available * (1 + 10 * checker)
        
        if np.max(values) > self._min_prob:
            code_move = np.argmax(values)
        else:
            code_move = np.argmax((self._root._N) * available * (1 + 10 * checker))
        
        res = np.zeros(225)
        res[code_move] = 1
        
        if (self._verbose):
            for elem in np.where(self._root._R != 0)[0]:
                try:
                    print(util.to_move(numpy.unravel_index(int(elem), (15, 15))), 'R:', self._root._R[int(elem)], 'V:', values[int(elem)])
                except:
                    print(np.where(self._root._R != 0))
                    continue
            
            print(self._name + ':', util.to_move([code_move // 15, code_move % 15]), \
                  'working time:', time() - self._start_time, 'iterations:', self._iters)

        return res.reshape(1, 225)