def test_create_network_with_2d_input(self): input_nodes = (5, 5) hidden_nodes = (50, 40, 30) input_layer, output_layer, variables = create_network( input_nodes, hidden_nodes) self.assertSequenceEqual(input_layer.get_shape().as_list(), [None, input_nodes[0], input_nodes[1]]) self.assertSequenceEqual(output_layer.get_shape().as_list(), [None, input_nodes[0] * input_nodes[1]]) self.assertEqual(len(variables), (len(hidden_nodes) + 1) * 2)
def test_load_variables_into_network_of_wrong_size_gives_friendly_exception( self): try: file_name = 'test.p' input_nodes = 20 _, _, variables1 = create_network(input_nodes, (30, )) _, _, variables2 = create_network(input_nodes, (40, )) with tf.Session() as session: session.run(tf.global_variables_initializer()) save_network(session, variables1, file_name) with self.assertRaises(ValueError): load_network(session, variables2, file_name) finally: try: os.remove(file_name) except OSError: pass
def test_save_and_load_network(self): try: file_name = 'test.p' input_nodes = 20 hidden_nodes = (50, 40, 30) _, _, variables1 = create_network(input_nodes, hidden_nodes) _, _, variables2 = create_network(input_nodes, hidden_nodes) with tf.Session() as session: session.run(tf.global_variables_initializer()) save_network(session, variables1, file_name) load_network(session, variables2, file_name) for var1, var2 in zip(variables1, variables2): np.testing.assert_array_almost_equal( session.run(var1), session.run(var2)) finally: try: os.remove(file_name) except OSError: pass
def load_network_player(network_filename, hidden_layers): session = tf.Session() input_layer, output_layer, variables = network_helpers.create_network( game_spec.board_squares(), hidden_layers) network_helpers.load_network(session, variables, network_filename) def network_player(board_state, side): print print "Network player (%s)" % side tic_tac_toe.print_game_state(board_state) move_probs = network_helpers.get_stochastic_network_move( session, input_layer, output_layer, board_state, side, log=True) move = game_spec.flat_move_to_tuple(move_probs.argmax()) print "Network move:", move return move return network_player
def predict_best_move_low_level(game_spec, create_network, network_file_path, player, board_state): """Make a predicition for the next move at a given state using some lower level parameters Args: create_network (->(input_layer : tf.placeholder, output_layer : tf.placeholder, variables : [tf.Variable])): Method that creates the network we will train. network_file_path (str): path to the file with weights we want to load for this network game_spec (games.base_game_spec.BaseGameSpec): The game we are playing player: The player to make the move 1 or -1 board_state: The state of the board at some time during the game Returns: a vector of zeros with a 1 on the position which represents the best move to be taken """ reward_placeholder = tf.placeholder("float", shape=(None, )) actual_move_placeholder = tf.placeholder("float", shape=(None, game_spec.outputs())) input_layer, output_layer, variables = create_network() policy_gradient = tf.log( tf.reduce_sum(tf.mul(actual_move_placeholder, output_layer), reduction_indices=1)) * reward_placeholder with tf.Session() as session: session.run(tf.initialize_all_variables()) if network_file_path and os.path.isfile(network_file_path): print("Loading trained network from ", network_file_path) load_network(session, variables, network_file_path) else: print("File with trained network can't be loaded. Exiting...'") return return get_stochastic_network_move(session, input_layer, output_layer, board_state, player)
def train_value_network(game_spec, hidden_nodes_reinforcement, reinforcement_network_file_path, hidden_nodes_value, value_network_file_path, learn_rate=1e-4, batch_size=100, train_samples=10000, test_samples=8000): reinforcement_input_layer, reinforcement_output_layer, reinforcement_variables = create_network( game_spec.board_squares(), hidden_nodes_reinforcement, game_spec.outputs()) value_input_layer, value_output_layer, value_variables = create_network( game_spec.board_squares(), hidden_nodes_value, output_nodes=1, output_softmax=False) target_placeholder = tf.compat.v1.placeholder("float", (None, 1)) error = tf.reduce_sum(input_tensor=tf.square(target_placeholder - value_output_layer)) train_step = tf.compat.v1.train.RMSPropOptimizer(learn_rate).minimize( error) with tf.compat.v1.Session() as session: session.run(tf.compat.v1.global_variables_initializer()) load_network(session, reinforcement_variables, reinforcement_network_file_path) if os.path.isfile(value_network_file_path): print("loading previous version of value network") load_network(session, value_variables, value_network_file_path) def make_move(board_state, side): move = get_deterministic_network_move(session, reinforcement_input_layer, reinforcement_output_layer, board_state, side) return game_spec.flat_move_to_tuple(np.argmax(move)) board_states_training = {} board_states_test = [] episode_number = 0 while len(board_states_training) < train_samples + test_samples: board_state = _generate_random_board_position( game_spec, (1, game_spec.board_squares() * 0.8)) board_state_flat = tuple(np.ravel(board_state)) # only accept the board_state if not already in the dict if board_state_flat not in board_states_training: result = game_spec.play_game(make_move, make_move, board_state=board_state) board_states_training[board_state_flat] = float(result) # take a random selection from training into a test set for _ in range(test_samples): sample = random.choice(board_states_training.keys()) board_states_test.append((sample, board_states_training[sample])) del board_states_training[sample] board_states_training = list(board_states_training.iteritems()) test_error = session.run(error, feed_dict={ value_input_layer: [x[0] for x in board_states_test], target_placeholder: [[x[1]] for x in board_states_test] }) while True: np.random.shuffle(board_states_training) train_error = 0 for start_index in range( 0, len(board_states_training) - batch_size + 1, batch_size): mini_batch = board_states_training[start_index:start_index + batch_size] batch_error, _ = session.run( [error, train_step], feed_dict={ value_input_layer: [x[0] for x in mini_batch], target_placeholder: [[x[1]] for x in mini_batch] }) train_error += batch_error new_test_error = session.run(error, feed_dict={ value_input_layer: [x[0] for x in board_states_test], target_placeholder: [[x[1]] for x in board_states_test] }) print("episode: %s train_error: %s test_error: %s" % (episode_number, train_error, test_error)) if new_test_error > test_error: print("train error went up, stopping training") break test_error = new_test_error episode_number += 1 save_network(session, value_variables, value_network_file_path)
number_moves = random.randint(*NUMBER_RANDOM_RANGE) side = 1 for _ in range(number_moves): board_state = game_spec.apply_move( board_state, random.choice(list(game_spec.available_moves(board_state))), side) if game_spec.has_winner(board_state) != 0: # start again if we hit an already winning position continue side = -side return board_state reinforcement_input_layer, reinforcement_output_layer, reinforcement_variables = create_network( game_spec.board_squares(), HIDDEN_NODES_REINFORCEMENT, game_spec.outputs()) value_input_layer, value_output_layer, value_variables = create_network( game_spec.board_squares(), HIDDEN_NODES_VALUE, output_nodes=1, output_softmax=False) target_placeholder = tf.placeholder("float", (None, 1)) error = tf.reduce_sum(tf.square(target_placeholder - value_output_layer)) train_step = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(error) with tf.Session() as session: session.run(tf.initialize_all_variables())
save_network_file_path = config['save_network_file_path'] number_of_historic_networks = 1 historic_network_base_path = config['historic_network_base_path'] number_of_games = config['number_of_games'] update_opponent_winrate = config['update_opponent_winrate'] print_results_every = config['print_results_every'] learn_rate = config['learn_rate'] batch_size = config['batch_size'] cnn_on = config['cnn_on'] eps = config['eps'] deterministic = config['deterministic'] mcts = config['mcts'] min_win_ticks = config['min_win_ticks'] beta = config['beta'] input_layer, output_layer, variables, weights = create_network() current_historical_index = 0 historical_networks = [] f = [] for (dirpath, dirnames, filenames) in os.walk(netloc): f.extend(filenames) break netlist_hist = [] raw_netlist = [] for file in f: p = re.compile('net_ep\d+_.+\.p') if 'config' in file: pass