Python get_stochastic_network_move Examples, common.network_helpers.get_stochastic_network_move Python Examples

Example #1

0

Show file

def _monte_carlo_sample(game_spec, board_state, side, policy = False, session = None, input_layer = None,
                        output_layer = None, valid_only = False, cnn_on = False):
    """Sample a single rollout from the current board_state and side. Moves are made to the current board_state until we
     reach a terminal state then the result and the first move made to get there is returned.

    Args:
        game_spec (BaseGameSpec): The specification for the game we are evaluating
        board_state (3x3 tuple of int): state of the board
        side (int): side currently to play. +1 for the plus player, -1 for the minus player

    Returns:
        (result(int), move(int,int)): The result from this rollout, +1 for a win for the plus player -1 for a win for
            the minus player, 0 for a draw
    """
    result = game_spec.has_winner(board_state)
    if result != None:
        return result, None

    moves = list(game_spec.available_moves(board_state))
    if not moves:
        return 0, None

    # select a random move
    if policy:
        # get stochastic network move gives wrong type (array of 81 elements instead of tuple), so we need to reconfigure
        move = get_stochastic_network_move(session, input_layer, output_layer, board_state, side,
                                            valid_only, game_spec, cnn_on)
        move = game_spec.flat_move_to_tuple([i for i,x in enumerate(move) if x == 1][0])
    else:
        move = random.choice(moves)
    result, next_move = _monte_carlo_sample(game_spec, game_spec.apply_move(board_state, move, side), -side, policy,
                                            session, input_layer, output_layer, valid_only, cnn_on)
    return result, move

Example #2

0

Show file

File: train_policy_gradient_historic.py Project: samrankin1/AlphaToe-rs

 def make_move_historical(histoical_network_index, board_state, side):
     net = historical_networks[histoical_network_index]
     move = get_stochastic_network_move(session,
                                        net[0],
                                        net[1],
                                        board_state,
                                        side,
                                        valid_only=True,
                                        game_spec=game_spec)
     return game_spec.flat_move_to_tuple(move.argmax())

Example #3

0

Show file

File: train_policy_gradient_historic.py Project: samrankin1/AlphaToe-rs

 def make_training_move(board_state, side):
     mini_batch_board_states.append(np.ravel(board_state) * side)
     move = get_stochastic_network_move(session,
                                        input_layer,
                                        output_layer,
                                        board_state,
                                        side,
                                        valid_only=True,
                                        game_spec=game_spec)
     mini_batch_moves.append(move)
     return game_spec.flat_move_to_tuple(move.argmax())

Example #4

0

Show file

File: play_network.py Project: sligocki/AlphaToe

    def network_player(board_state, side):
        print
        print "Network player (%s)" % side
        tic_tac_toe.print_game_state(board_state)

        move_probs = network_helpers.get_stochastic_network_move(
            session, input_layer, output_layer, board_state, side, log=True)
        move = game_spec.flat_move_to_tuple(move_probs.argmax())

        print "Network move:", move
        return move

Example #5

0

Show file

File: train_policy_gradient.py Project: internetvandingen/PUT

        def make_training_move(board_state, side):
            if cnn_on:
                # We must have the first 3x3 board as first 9 entries of the list, second 3x3 board as next 9 entries etc.
                # This is required for the CNN. The CNN takes the first 9 entries and forms a 3x3 board etc.
                """If the 10 split 3x3 boards are desired, use create_3x3_board_states(board_state) here"""
                np_board_state = create_3x3_board_states(board_state)
            else:
                np_board_state = np.array(board_state)
            np_board_state[np_board_state > 1] = 0
            mini_batch_board_states.append(
                np_board_state * side
            )  # append all states are used in the minibatch (+ and - determine which player's state it was)

            rand_numb = random.uniform(0., 1.)
            if rand_numb < eps:
                move = get_random_network_move(board_state, game_spec)
            elif deterministic:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec,
                                                      cnn_on=cnn_on)
            else:
                if mcts:
                    _, move = monte_carlo_tree_search(game_spec, board_state,
                                                      side, 27, session,
                                                      input_layer,
                                                      output_layer, True,
                                                      cnn_on, True)
                else:
                    move = get_stochastic_network_move(session,
                                                       input_layer,
                                                       output_layer,
                                                       board_state,
                                                       side,
                                                       valid_only=True,
                                                       game_spec=game_spec,
                                                       cnn_on=cnn_on)
            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            if cnn_on:
                # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state),
                # the same should happen for the mini batch moves
                move = create_3x3_board_states(np.reshape(
                    move, [9, 9]))  # The function requires a 9x9 array
                mini_batch_moves.append(move[0:81])
            else:
                mini_batch_moves.append(move)
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

Example #6

0

Show file

File: train_policy_gradient_historic.py Project: internetvandingen/PUT

        def make_training_move(board_state, side):
            if cnn_on:
                np_board_state = create_3x3_board_states(board_state)
            else:
                np_board_state = np.array(board_state)

            mini_batch_board_states.append(np_board_state * side)

            rand_numb = random.uniform(0., 1.)
            if rand_numb < eps:
                move = get_random_network_move(board_state, game_spec)
            elif deterministic:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec,
                                                      cnn_on=cnn_on)
            else:
                if mcts:
                    _, move = monte_carlo_tree_search(game_spec, board_state,
                                                      side, 27, session,
                                                      input_layer,
                                                      output_layer, True,
                                                      cnn_on, True)
                else:
                    move = get_stochastic_network_move(session,
                                                       input_layer,
                                                       output_layer,
                                                       board_state,
                                                       side,
                                                       valid_only=True,
                                                       game_spec=game_spec,
                                                       cnn_on=cnn_on)

            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            if cnn_on:
                # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state),
                # the same should happen for the mini batch moves
                move = create_3x3_board_states(np.reshape(
                    move, [9, 9]))  # The function requires a 9x9 array
                mini_batch_moves.append(move[0:81])
            else:
                mini_batch_moves.append(move)
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

Example #7

0

Show file

File: measure_network.py Project: internetvandingen/PUT

    def make_move_historical(net, board_state, side):
        if mcts:
            _, move = monte_carlo_tree_search(game_spec, board_state, side, 27,
                                              session, input_layer,
                                              output_layer, True, cnn_on, True)
        else:
            # move = get_deterministic_network_move(session, net[0], net[1], board_state, side,
            # 										valid_only = True, game_spec = game_spec, cnn_on = cnn_on)
            move = get_stochastic_network_move(session,
                                               net[0],
                                               net[1],
                                               board_state,
                                               side,
                                               valid_only=True,
                                               game_spec=game_spec,
                                               cnn_on=cnn_on)

        move_for_game = np.asarray(
            move)  # move must be an array, mcts doesn't return this
        return game_spec.flat_move_to_tuple(move_for_game.argmax())

Example #8

0

Show file

File: predict_next_move.py Project: crisbodnar/DeepTacToe

def predict_best_move_low_level(game_spec, create_network, network_file_path,
                                player, board_state):
    """Make a predicition for the next move at a given state using some lower level parameters

    Args:
        create_network (->(input_layer : tf.placeholder, output_layer : tf.placeholder, variables : [tf.Variable])):
            Method that creates the network we will train.
        network_file_path (str): path to the file with weights we want to load for this network
        game_spec (games.base_game_spec.BaseGameSpec): The game we are playing
        player: The player to make the move 1 or -1
        board_state: The state of the board at some time during the game

    Returns:
        a vector of zeros with a 1 on the position which represents the best move to be taken
    """
    reward_placeholder = tf.placeholder("float", shape=(None, ))
    actual_move_placeholder = tf.placeholder("float",
                                             shape=(None, game_spec.outputs()))

    input_layer, output_layer, variables = create_network()

    policy_gradient = tf.log(
        tf.reduce_sum(tf.mul(actual_move_placeholder, output_layer),
                      reduction_indices=1)) * reward_placeholder

    with tf.Session() as session:
        session.run(tf.initialize_all_variables())

        if network_file_path and os.path.isfile(network_file_path):
            print("Loading trained network from ", network_file_path)
            load_network(session, variables, network_file_path)
        else:
            print("File with trained network can't be loaded. Exiting...'")
            return

        return get_stochastic_network_move(session, input_layer, output_layer,
                                           board_state, player)

Example #9

0

Show file

File: train_policy_gradient.py Project: aayam-awesome/AlphaToe-Test

 def make_training_move(board_state, side):
     mini_batch_board_states.append(np.ravel(board_state) * side)
     move = get_stochastic_network_move(session, input_layer, output_layer, board_state, side)
     mini_batch_moves.append(move)
     #print("Training Move Called")
     return game_spec.flat_move_to_tuple(move.argmax())

Example #10

0

Show file

import collections