def make_move(board_state, side): move = get_deterministic_network_move(session, reinforcement_input_layer, reinforcement_output_layer, board_state, side) return game_spec.flat_move_to_tuple(np.argmax(move))
def make_move(board_state, side): move = get_deterministic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec) return game_spec.flat_move_to_tuple(move.argmax())
def make_move_historical(histoical_network_index, board_state, side): net = historical_networks[histoical_network_index] move = get_deterministic_network_move(session, net[0], net[1], board_state, side, valid_only=True, game_spec=game_spec) return game_spec.flat_move_to_tuple(move.argmax())
def make_training_move(board_state, side): if cnn_on: # We must have the first 3x3 board as first 9 entries of the list, second 3x3 board as next 9 entries etc. # This is required for the CNN. The CNN takes the first 9 entries and forms a 3x3 board etc. """If the 10 split 3x3 boards are desired, use create_3x3_board_states(board_state) here""" np_board_state = create_3x3_board_states(board_state) else: np_board_state = np.array(board_state) np_board_state[np_board_state > 1] = 0 mini_batch_board_states.append( np_board_state * side ) # append all states are used in the minibatch (+ and - determine which player's state it was) rand_numb = random.uniform(0., 1.) if rand_numb < eps: move = get_random_network_move(board_state, game_spec) elif deterministic: move = get_deterministic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) else: if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: move = get_stochastic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) move_for_game = np.asarray( move ) # The move returned to the game is in a different configuration than the CNN learn move if cnn_on: # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state), # the same should happen for the mini batch moves move = create_3x3_board_states(np.reshape( move, [9, 9])) # The function requires a 9x9 array mini_batch_moves.append(move[0:81]) else: mini_batch_moves.append(move) return game_spec.flat_move_to_tuple(move_for_game.argmax())
def make_training_move(board_state, side): if cnn_on: np_board_state = create_3x3_board_states(board_state) else: np_board_state = np.array(board_state) mini_batch_board_states.append(np_board_state * side) rand_numb = random.uniform(0., 1.) if rand_numb < eps: move = get_random_network_move(board_state, game_spec) elif deterministic: move = get_deterministic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) else: if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: move = get_stochastic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) move_for_game = np.asarray( move ) # The move returned to the game is in a different configuration than the CNN learn move if cnn_on: # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state), # the same should happen for the mini batch moves move = create_3x3_board_states(np.reshape( move, [9, 9])) # The function requires a 9x9 array mini_batch_moves.append(move[0:81]) else: mini_batch_moves.append(move) return game_spec.flat_move_to_tuple(move_for_game.argmax())
def player_func(board_state, side): if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: move = get_deterministic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec) move_for_game = np.asarray( move ) # The move returned to the game is in a different configuration than the CNN learn move return game_spec.flat_move_to_tuple(move_for_game.argmax())
def make_move_historical(historical_network_index, board_state, side): net = historical_networks[historical_network_index] #move = get_stochastic_network_move(session, net[0], net[1], board_state, side, # valid_only=True, game_spec=game_spec, CNN_ON=cnn_on) if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: move = get_deterministic_network_move(session, net[0], net[1], board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) move_for_game = np.asarray( move) # move must be an array, mcts doesn't return this return game_spec.flat_move_to_tuple(move_for_game.argmax())