Esempio n. 1
0
 def test_create_network_with_2d_input(self):
     input_nodes = (5, 5)
     hidden_nodes = (50, 40, 30)
     input_layer, output_layer, variables = create_network(
         input_nodes, hidden_nodes)
     self.assertSequenceEqual(input_layer.get_shape().as_list(),
                              [None, input_nodes[0], input_nodes[1]])
     self.assertSequenceEqual(output_layer.get_shape().as_list(),
                              [None, input_nodes[0] * input_nodes[1]])
     self.assertEqual(len(variables), (len(hidden_nodes) + 1) * 2)
Esempio n. 2
0
    def test_load_variables_into_network_of_wrong_size_gives_friendly_exception(
            self):
        try:
            file_name = 'test.p'
            input_nodes = 20

            _, _, variables1 = create_network(input_nodes, (30, ))
            _, _, variables2 = create_network(input_nodes, (40, ))

            with tf.Session() as session:
                session.run(tf.global_variables_initializer())

                save_network(session, variables1, file_name)

                with self.assertRaises(ValueError):
                    load_network(session, variables2, file_name)
        finally:
            try:
                os.remove(file_name)
            except OSError:
                pass
Esempio n. 3
0
    def test_save_and_load_network(self):
        try:
            file_name = 'test.p'
            input_nodes = 20
            hidden_nodes = (50, 40, 30)
            _, _, variables1 = create_network(input_nodes, hidden_nodes)
            _, _, variables2 = create_network(input_nodes, hidden_nodes)

            with tf.Session() as session:
                session.run(tf.global_variables_initializer())

                save_network(session, variables1, file_name)
                load_network(session, variables2, file_name)

                for var1, var2 in zip(variables1, variables2):
                    np.testing.assert_array_almost_equal(
                        session.run(var1), session.run(var2))
        finally:
            try:
                os.remove(file_name)
            except OSError:
                pass
Esempio n. 4
0
def load_network_player(network_filename, hidden_layers):
    session = tf.Session()
    input_layer, output_layer, variables = network_helpers.create_network(
        game_spec.board_squares(), hidden_layers)
    network_helpers.load_network(session, variables, network_filename)

    def network_player(board_state, side):
        print
        print "Network player (%s)" % side
        tic_tac_toe.print_game_state(board_state)

        move_probs = network_helpers.get_stochastic_network_move(
            session, input_layer, output_layer, board_state, side, log=True)
        move = game_spec.flat_move_to_tuple(move_probs.argmax())

        print "Network move:", move
        return move
    return network_player
Esempio n. 5
0
def predict_best_move_low_level(game_spec, create_network, network_file_path,
                                player, board_state):
    """Make a predicition for the next move at a given state using some lower level parameters

    Args:
        create_network (->(input_layer : tf.placeholder, output_layer : tf.placeholder, variables : [tf.Variable])):
            Method that creates the network we will train.
        network_file_path (str): path to the file with weights we want to load for this network
        game_spec (games.base_game_spec.BaseGameSpec): The game we are playing
        player: The player to make the move 1 or -1
        board_state: The state of the board at some time during the game

    Returns:
        a vector of zeros with a 1 on the position which represents the best move to be taken
    """
    reward_placeholder = tf.placeholder("float", shape=(None, ))
    actual_move_placeholder = tf.placeholder("float",
                                             shape=(None, game_spec.outputs()))

    input_layer, output_layer, variables = create_network()

    policy_gradient = tf.log(
        tf.reduce_sum(tf.mul(actual_move_placeholder, output_layer),
                      reduction_indices=1)) * reward_placeholder

    with tf.Session() as session:
        session.run(tf.initialize_all_variables())

        if network_file_path and os.path.isfile(network_file_path):
            print("Loading trained network from ", network_file_path)
            load_network(session, variables, network_file_path)
        else:
            print("File with trained network can't be loaded. Exiting...'")
            return

        return get_stochastic_network_move(session, input_layer, output_layer,
                                           board_state, player)
def train_value_network(game_spec,
                        hidden_nodes_reinforcement,
                        reinforcement_network_file_path,
                        hidden_nodes_value,
                        value_network_file_path,
                        learn_rate=1e-4,
                        batch_size=100,
                        train_samples=10000,
                        test_samples=8000):
    reinforcement_input_layer, reinforcement_output_layer, reinforcement_variables = create_network(
        game_spec.board_squares(), hidden_nodes_reinforcement,
        game_spec.outputs())

    value_input_layer, value_output_layer, value_variables = create_network(
        game_spec.board_squares(),
        hidden_nodes_value,
        output_nodes=1,
        output_softmax=False)

    target_placeholder = tf.compat.v1.placeholder("float", (None, 1))
    error = tf.reduce_sum(input_tensor=tf.square(target_placeholder -
                                                 value_output_layer))

    train_step = tf.compat.v1.train.RMSPropOptimizer(learn_rate).minimize(
        error)

    with tf.compat.v1.Session() as session:
        session.run(tf.compat.v1.global_variables_initializer())

        load_network(session, reinforcement_variables,
                     reinforcement_network_file_path)

        if os.path.isfile(value_network_file_path):
            print("loading previous version of value network")
            load_network(session, value_variables, value_network_file_path)

        def make_move(board_state, side):
            move = get_deterministic_network_move(session,
                                                  reinforcement_input_layer,
                                                  reinforcement_output_layer,
                                                  board_state, side)

            return game_spec.flat_move_to_tuple(np.argmax(move))

        board_states_training = {}
        board_states_test = []
        episode_number = 0

        while len(board_states_training) < train_samples + test_samples:
            board_state = _generate_random_board_position(
                game_spec, (1, game_spec.board_squares() * 0.8))
            board_state_flat = tuple(np.ravel(board_state))

            # only accept the board_state if not already in the dict
            if board_state_flat not in board_states_training:
                result = game_spec.play_game(make_move,
                                             make_move,
                                             board_state=board_state)
                board_states_training[board_state_flat] = float(result)

        # take a random selection from training into a test set
        for _ in range(test_samples):
            sample = random.choice(board_states_training.keys())
            board_states_test.append((sample, board_states_training[sample]))
            del board_states_training[sample]

        board_states_training = list(board_states_training.iteritems())

        test_error = session.run(error,
                                 feed_dict={
                                     value_input_layer:
                                     [x[0] for x in board_states_test],
                                     target_placeholder:
                                     [[x[1]] for x in board_states_test]
                                 })

        while True:
            np.random.shuffle(board_states_training)
            train_error = 0

            for start_index in range(
                    0,
                    len(board_states_training) - batch_size + 1, batch_size):
                mini_batch = board_states_training[start_index:start_index +
                                                   batch_size]

                batch_error, _ = session.run(
                    [error, train_step],
                    feed_dict={
                        value_input_layer: [x[0] for x in mini_batch],
                        target_placeholder: [[x[1]] for x in mini_batch]
                    })
                train_error += batch_error

            new_test_error = session.run(error,
                                         feed_dict={
                                             value_input_layer:
                                             [x[0] for x in board_states_test],
                                             target_placeholder:
                                             [[x[1]]
                                              for x in board_states_test]
                                         })

            print("episode: %s train_error: %s test_error: %s" %
                  (episode_number, train_error, test_error))

            if new_test_error > test_error:
                print("train error went up, stopping training")
                break

            test_error = new_test_error
            episode_number += 1

        save_network(session, value_variables, value_network_file_path)
Esempio n. 7
0
        number_moves = random.randint(*NUMBER_RANDOM_RANGE)
        side = 1
        for _ in range(number_moves):
            board_state = game_spec.apply_move(
                board_state,
                random.choice(list(game_spec.available_moves(board_state))),
                side)
            if game_spec.has_winner(board_state) != 0:
                # start again if we hit an already winning position
                continue

            side = -side
        return board_state


reinforcement_input_layer, reinforcement_output_layer, reinforcement_variables = create_network(
    game_spec.board_squares(), HIDDEN_NODES_REINFORCEMENT, game_spec.outputs())

value_input_layer, value_output_layer, value_variables = create_network(
    game_spec.board_squares(),
    HIDDEN_NODES_VALUE,
    output_nodes=1,
    output_softmax=False)

target_placeholder = tf.placeholder("float", (None, 1))
error = tf.reduce_sum(tf.square(target_placeholder - value_output_layer))

train_step = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(error)

with tf.Session() as session:
    session.run(tf.initialize_all_variables())
Esempio n. 8
0
save_network_file_path = config['save_network_file_path']
number_of_historic_networks = 1
historic_network_base_path = config['historic_network_base_path']
number_of_games = config['number_of_games']
update_opponent_winrate = config['update_opponent_winrate']
print_results_every = config['print_results_every']
learn_rate = config['learn_rate']
batch_size = config['batch_size']
cnn_on = config['cnn_on']
eps = config['eps']
deterministic = config['deterministic']
mcts = config['mcts']
min_win_ticks = config['min_win_ticks']
beta = config['beta']

input_layer, output_layer, variables, weights = create_network()

current_historical_index = 0
historical_networks = []

f = []
for (dirpath, dirnames, filenames) in os.walk(netloc):
    f.extend(filenames)
    break

netlist_hist = []
raw_netlist = []
for file in f:
    p = re.compile('net_ep\d+_.+\.p')
    if 'config' in file:
        pass