Python get_deterministic_network_moveの例、common.network_helpers.get_deterministic_network_move Pythonの例

コード例 #1

0

ファイルを表示

ファイル: train_value_network.py プロジェクト: pjoshi30/AlphaToeTF2

        def make_move(board_state, side):
            move = get_deterministic_network_move(session,
                                                  reinforcement_input_layer,
                                                  reinforcement_output_layer,
                                                  board_state, side)

            return game_spec.flat_move_to_tuple(np.argmax(move))

コード例 #2

0

ファイルを表示

ファイル: benchmark.py プロジェクト: xanadonf/AlphaToe

 def make_move(board_state, side):
     move = get_deterministic_network_move(session,
                                           input_layer,
                                           output_layer,
                                           board_state,
                                           side,
                                           valid_only=True,
                                           game_spec=game_spec)
     return game_spec.flat_move_to_tuple(move.argmax())

コード例 #3

0

ファイルを表示

ファイル: train_policy_gradient_historic.py プロジェクト: bwjia-penn/TicRL

 def make_move_historical(histoical_network_index, board_state, side):
     net = historical_networks[histoical_network_index]
     move = get_deterministic_network_move(session,
                                           net[0],
                                           net[1],
                                           board_state,
                                           side,
                                           valid_only=True,
                                           game_spec=game_spec)
     return game_spec.flat_move_to_tuple(move.argmax())

コード例 #4

0

ファイルを表示

ファイル: train_policy_gradient.py プロジェクト: internetvandingen/PUT

        def make_training_move(board_state, side):
            if cnn_on:
                # We must have the first 3x3 board as first 9 entries of the list, second 3x3 board as next 9 entries etc.
                # This is required for the CNN. The CNN takes the first 9 entries and forms a 3x3 board etc.
                """If the 10 split 3x3 boards are desired, use create_3x3_board_states(board_state) here"""
                np_board_state = create_3x3_board_states(board_state)
            else:
                np_board_state = np.array(board_state)
            np_board_state[np_board_state > 1] = 0
            mini_batch_board_states.append(
                np_board_state * side
            )  # append all states are used in the minibatch (+ and - determine which player's state it was)

            rand_numb = random.uniform(0., 1.)
            if rand_numb < eps:
                move = get_random_network_move(board_state, game_spec)
            elif deterministic:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec,
                                                      cnn_on=cnn_on)
            else:
                if mcts:
                    _, move = monte_carlo_tree_search(game_spec, board_state,
                                                      side, 27, session,
                                                      input_layer,
                                                      output_layer, True,
                                                      cnn_on, True)
                else:
                    move = get_stochastic_network_move(session,
                                                       input_layer,
                                                       output_layer,
                                                       board_state,
                                                       side,
                                                       valid_only=True,
                                                       game_spec=game_spec,
                                                       cnn_on=cnn_on)
            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            if cnn_on:
                # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state),
                # the same should happen for the mini batch moves
                move = create_3x3_board_states(np.reshape(
                    move, [9, 9]))  # The function requires a 9x9 array
                mini_batch_moves.append(move[0:81])
            else:
                mini_batch_moves.append(move)
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

コード例 #5

0

ファイルを表示

ファイル: train_policy_gradient_historic.py プロジェクト: internetvandingen/PUT

        def make_training_move(board_state, side):
            if cnn_on:
                np_board_state = create_3x3_board_states(board_state)
            else:
                np_board_state = np.array(board_state)

            mini_batch_board_states.append(np_board_state * side)

            rand_numb = random.uniform(0., 1.)
            if rand_numb < eps:
                move = get_random_network_move(board_state, game_spec)
            elif deterministic:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec,
                                                      cnn_on=cnn_on)
            else:
                if mcts:
                    _, move = monte_carlo_tree_search(game_spec, board_state,
                                                      side, 27, session,
                                                      input_layer,
                                                      output_layer, True,
                                                      cnn_on, True)
                else:
                    move = get_stochastic_network_move(session,
                                                       input_layer,
                                                       output_layer,
                                                       board_state,
                                                       side,
                                                       valid_only=True,
                                                       game_spec=game_spec,
                                                       cnn_on=cnn_on)

            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            if cnn_on:
                # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state),
                # the same should happen for the mini batch moves
                move = create_3x3_board_states(np.reshape(
                    move, [9, 9]))  # The function requires a 9x9 array
                mini_batch_moves.append(move[0:81])
            else:
                mini_batch_moves.append(move)
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

コード例 #6

0

ファイルを表示

        def player_func(board_state, side):
            if mcts:
                _, move = monte_carlo_tree_search(game_spec, board_state, side,
                                                  27, session, input_layer,
                                                  output_layer, True, cnn_on,
                                                  True)
            else:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec)

            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

コード例 #7

0

ファイルを表示

ファイル: train_policy_gradient_historic.py プロジェクト: internetvandingen/PUT

        def make_move_historical(historical_network_index, board_state, side):
            net = historical_networks[historical_network_index]
            #move = get_stochastic_network_move(session, net[0], net[1], board_state, side,
            #                                  valid_only=True, game_spec=game_spec, CNN_ON=cnn_on)
            if mcts:
                _, move = monte_carlo_tree_search(game_spec, board_state, side,
                                                  27, session, input_layer,
                                                  output_layer, True, cnn_on,
                                                  True)
            else:
                move = get_deterministic_network_move(session,
                                                      net[0],
                                                      net[1],
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec,
                                                      cnn_on=cnn_on)

            move_for_game = np.asarray(
                move)  # move must be an array, mcts doesn't return this
            return game_spec.flat_move_to_tuple(move_for_game.argmax())