Beispiel #1
0
    def __init__(self,
                 game,
                 checkpoint_directory,
                 actor=None,
                 network_save_interval=100,
                 rollouts=100,
                 start_game=0,
                 replay_save_interval=250,
                 replay_limit=20000,
                 minibatch_size=50,
                 replay_file=None,
                 test_games=50,
                 nn_steps=1):
        self.game = game
        self.checkpoint_directory = checkpoint_directory
        self.network_save_interval = network_save_interval
        self.mcts = MCTS(game,
                         simulations=rollouts,
                         default_policy=self.create_default_policy())
        self.game_count = start_game
        self.replay_save_interval = replay_save_interval
        self.replay_buffer = deque(maxlen=replay_limit)
        self.rp_count = 0
        self.minibatch_size = minibatch_size
        self.test_games = test_games
        self.nn_steps = nn_steps

        if replay_file == 'auto':
            self.replay_file = f'{checkpoint_directory}/replays.txt'
        else:
            self.replay_file = replay_file

        if not os.path.exists(checkpoint_directory):
            os.makedirs(checkpoint_directory)

        if actor:
            self.actor = actor
            self.save_actor_to_file()
        else:
            self.actor = self.load_actor_from_file()
            if start_game > 0:
                self.actor.load_checkpoint(
                    f'{checkpoint_directory}/game_{start_game}')

        if replay_save_interval > replay_limit:
            raise ValueError(
                f'replay_save_interval ({replay_save_interval}) must be smaller '
                f'than replay_limit ({replay_limit})')

        if replay_file is not None and replay_file != 'auto':
            try:
                self.load_replays()
            except FileNotFoundError:
                pass

        if start_game == 0:
            self.actor.save_checkpoint(checkpoint_directory + '/game_0')
            self.actor.save_checkpoint(checkpoint_directory + '/best')
            with open(checkpoint_directory + '/best.txt', 'w') as f:
                f.write(str(0))
Beispiel #2
0
class MCTSTrainer:
    def __init__(self, gnn, test_graphs, filename):
        self.mcts = MCTS(gnn)
        self.test_graphs = test_graphs
        self.test_result = []
        self.filename = filename

    # rollout until the end
    def train1(self, graph, TAU, batch_size=10, iter_p=2):
        self.mcts.train(graph, TAU, batch_size=batch_size, iter_p=iter_p)

    # rollout only until leaf
    def train2(self, graph, TAU, batch_size=10, iter_p=2):
        self.mcts.train(graph,
                        TAU,
                        batch_size=batch_size,
                        stop_at_leaf=True,
                        iter_p=2)

    def test(self):
        result = [self.mcts.search(graph) for graph in self.test_graphs]
        print(result)
        self.test_result.append(result)

    def save_test_result(self):
        os.makedirs("log", exist_ok=True)
        with open("log/{}.pickle".format(self.filename), mode="wb") as f:
            pickle.dump(self.test_result, f)

    def save_model(self):
        os.makedirs("model", exist_ok=True)
        torch.save(self.mcts.gnn.state_dict(),
                   "model/{}.pth".format(self.filename))
Beispiel #3
0
class AI(Player):
    def __init__(self, name, m):
        Player.__init__(self, name)
        self.brain = MCTS(m)

    def request_input(self, game: Game, state: State):
        self.brain.simulate(game, state)
        best_idx = self.brain.get_best_child(state)
        return game.gen_child_states(state)[best_idx]
Beispiel #4
0
    def __init__(self, ip_address=None, verbose=True, auto_test=False):
        self.series_id = -1
        self.starting_player = -1
        self.game_count = 0
        self.series_count = 0
        self.series_game_count = 0
        BasicClientActorAbs.__init__(self,
                                     ip_address,
                                     verbose=verbose,
                                     auto_test=auto_test)

        trainer = ActorTrainer(self.hex,
                               'model/1000x500x100-200',
                               start_game=250)
        #self.actor = trainer.actor
        self.actor = MCTS(self.hex, simulations=100)
Beispiel #5
0
def use_model(t):
    gnn, name, graph = t
    np.random.seed()

    mcts = MCTS(gnn, performance=True)

    Timer.start('all')

    result = mcts.search_for_exp(graph, time_limit=10 * 60, min_iter_num=100)
    print("graph: {}, result: {}".format(name, result))
    print("max: ", max(result))

    Timer.end('all')
    Timer.print()

    return max(result)
def choose_mcts(next_move_types, next_moves, last_move_type, last_move, game,
                action_mcts, simulation):

    #init mcts
    if simulation == False:

        #要不起不需要mcst
        if len(next_moves) == 0:
            print("actions", [430])
            return "yaobuqi", []

        game_copy = copy.deepcopy(game)

        game_copy.players[0].model = "mcts"
        game_copy.players[1].model = "random"
        game_copy.players[2].model = "random"

        mcts = MCTS(tree_policy=UCB1(c=1.41),
                    default_policy=random_terminal_roll_out,
                    backup=monte_carlo,
                    game=game_copy)

        #state
        s = get_state(game_copy.playrecords, player=1)
        #action
        actions = get_actions(next_moves, game_copy.actions_lookuptable,
                              game_copy)
        #new state
        s = combine(s, actions)

        begin = time.time()
        best_action, win_prob = mcts(s, n=1000)

        game.playrecords.win_prob = round(win_prob, 2)

        duration = time.time() - begin
        print("actions", actions, "best_action", best_action, "win_prob",
              win_prob, "time", duration)

        if best_action == 429:
            return "buyao", []
        elif best_action == 430:
            return "yaobuqi", []
        else:
            best_action_id = actions.index(best_action)
            return next_move_types[best_action_id], next_moves[best_action_id]
    #mcts simulation
    else:
        if action_mcts == 429:
            return "buyao", []
        elif action_mcts == 430:
            return "yaobuqi", []
        else:
            return next_move_types[action_mcts], next_moves[action_mcts]
Beispiel #7
0
def uct_play_game():
    game = NimGame(15)
    search_mgr = SearchMgr()
    p1, p2 = MCTS(search_mgr).set_root(game), MCTS(search_mgr).set_root(game)
    while game.get_actions():
        print(str(game))
        a1 = p1.uct(game, iters=100)
        a2 = p2.uct(game, iters=1000)
        if game.player_just_moved == 1:
            # play with values for iter_max and verbose = True
            # Player 2
            a = a2
        else:
            # Player 1
            a = a1
        print('Best Action: ' + str(a) + '\n')
        game.take_action(a)
        p1.update_root(a)
        p2.update_root(a)
    if game.get_result(game.player_just_moved) == 1.0:
        print('Player ' + str(game.player_just_moved) + ' wins!')
    elif game.get_result(game.player_just_moved) == 0.0:
        print('Player ' + str(3 - game.player_just_moved) + ' wins!')
    else:
        print('Nobody wins!')
Beispiel #8
0
def play_series(x):
    game = Hex()
    actor = Actor(game, [], replay_file='model/replays_expert.txt', rp_save_interval=replay_save_interval)
    mcts = MCTS(game, simulations=rollouts)

    for i in range(games_per_series):
        print('Starting game 1')
        state = game.get_initial_state()
        mcts.set_state(state)
        while not game.is_finished(state):
            move, probabilities = mcts.select_move(True)
            padded_probs = np.pad(probabilities, (0, game.num_possible_moves() - len(probabilities)), 'constant')
            actor.add_to_replay_buffer(state, padded_probs)
            state = game.get_outcome_state(state, move)
            mcts.set_state(state)
Beispiel #9
0
params["n_input_features"] = numpy.prod(env.observation_space.shape)
params["env"] = env
params["gamma"] = 0.99

# Planning/MCTS Hyperparameters
params["horizon"] = 10
params["simulations"] = 100  # 1000

# Deep RL Hyperparameters
params["alpha"] = 0.0005  # 0.001
params["epsilon"] = 0.1
params["memory_capacity"] = 10000
params["warmup_phase"] = 1000
params["target_update_interval"] = 5000
params["minibatch_size"] = 64
params["epsilon_linear_decay"] = 1.0 / params["memory_capacity"]
params["epsilon_min"] = 0.01
training_episodes = 1  # 2000

mcts_agent = MCTS(params["env"],
                  params["gamma"],
                  c=1.,
                  n_iter=params["simulations"])
a2c_agent = a2c.A2CLearner(params)
lens = [len(episode(env, mcts_agent, a2c_agent, i)) for i in range(500)]
actions = episode(env, mcts_agent, a2c_agent, 500)
print('-')
print(actions)
plot.plot(lens)
plot.show()
Beispiel #10
0
pos = new_chess_game()

for i in range(len(move)):
    movei = move[i]
    if pos[movei[0],movei[1]] != 0:
        pos[movei[2],movei[3]] = pos[movei[0],movei[1]]
        pos[movei[0],movei[1]] = 0
    else:
        ValueErr("error")

print(np.flipud(pos))



mcts = MCTS(tree_policy=Go(c=5), 
            default_policy=RandomKStepRollOut_Value(20, 0.95),
            backup=monte_carlo)

policy_fun = policy_nn()
rollout_fun = rollout_nn()
value_fun = value_nn() 

root = StateNode(None, ChessState(pos, 1, policy_fun, rollout_fun, value_fun, False ))
best_action = mcts(root, n=500)

pr.disable()
s = io.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())
Beispiel #11
0
from games.hex import Hex
from games.random_player import RandomPlayer
from mcts.mcts import MCTS

game = Hex()
p1 = MCTS(game, simulations=50)
p2 = RandomPlayer(game)
players = (p1, p2)
games = 20

p2_starting = False
wins = 0
for i in range(games):
    state = game.get_initial_state()
    turn = p2_starting

    while not game.is_finished(state):
        for p in players:
            p.set_state(state)
        move = players[int(turn)].select_move()
        state = game.get_outcome_state(state, move)
        turn = not turn

    result = game.evaluate_state(state)
    if p2_starting and result == -1 or not p2_starting and result == 1:
        wins += 1
        print(f'Won game {i+1}')
    else:
        print(f'Lost game {i+1}')

    p2_starting = not p2_starting
Beispiel #12
0
class ActorTrainer:
    def __init__(self,
                 game,
                 checkpoint_directory,
                 actor=None,
                 network_save_interval=100,
                 rollouts=100,
                 start_game=0,
                 replay_save_interval=250,
                 replay_limit=20000,
                 minibatch_size=50,
                 replay_file=None,
                 test_games=50,
                 nn_steps=1):
        self.game = game
        self.checkpoint_directory = checkpoint_directory
        self.network_save_interval = network_save_interval
        self.mcts = MCTS(game,
                         simulations=rollouts,
                         default_policy=self.create_default_policy())
        self.game_count = start_game
        self.replay_save_interval = replay_save_interval
        self.replay_buffer = deque(maxlen=replay_limit)
        self.rp_count = 0
        self.minibatch_size = minibatch_size
        self.test_games = test_games
        self.nn_steps = nn_steps

        if replay_file == 'auto':
            self.replay_file = f'{checkpoint_directory}/replays.txt'
        else:
            self.replay_file = replay_file

        if not os.path.exists(checkpoint_directory):
            os.makedirs(checkpoint_directory)

        if actor:
            self.actor = actor
            self.save_actor_to_file()
        else:
            self.actor = self.load_actor_from_file()
            if start_game > 0:
                self.actor.load_checkpoint(
                    f'{checkpoint_directory}/game_{start_game}')

        if replay_save_interval > replay_limit:
            raise ValueError(
                f'replay_save_interval ({replay_save_interval}) must be smaller '
                f'than replay_limit ({replay_limit})')

        if replay_file is not None and replay_file != 'auto':
            try:
                self.load_replays()
            except FileNotFoundError:
                pass

        if start_game == 0:
            self.actor.save_checkpoint(checkpoint_directory + '/game_0')
            self.actor.save_checkpoint(checkpoint_directory + '/best')
            with open(checkpoint_directory + '/best.txt', 'w') as f:
                f.write(str(0))

    def train(self, num_games):
        for i in range(num_games):
            self.game_count += 1
            game_start_time = time.time()
            print(f'[GAME {self.game_count}] Initializing state')
            state = self.game.get_initial_state()
            self.mcts.set_state(state)

            print(f'[GAME {self.game_count}] Simulating game')
            while not self.game.is_finished(state):
                move, probabilities = self.mcts.select_move(True)
                padded_probs = np.pad(
                    probabilities,
                    (0, self.game.num_possible_moves() - len(probabilities)),
                    'constant')
                self.add_to_replay_buffer(state, padded_probs)
                state = game.get_outcome_state(state, move)
                self.mcts.set_state(state)

            print(f'[GAME {self.game_count}] Training neural network')
            for j in range(self.nn_steps):
                self.train_network()

            if self.game_count % self.network_save_interval == 0:
                print(
                    f'[GAME {self.game_count}] Saving neural network checkpoint'
                )
                self.actor.save_checkpoint(
                    f'{self.checkpoint_directory}/game_{self.game_count}')
                if self.test_against_best():
                    print(
                        f'[GAME {self.game_count}] New best found - saving checkpoint'
                    )
            print(
                f'[GAME {self.game_count}] Time elapsed: {time.time() - game_start_time:.2f}'
            )
            print()

    def test_against_best(self):
        if self.test_games <= 0:
            return False
        print(f'[GAME {self.game_count}] Testing against best model...',
              end='')
        best_actor = self.load_actor_from_file()
        best_actor.load_checkpoint(f'{self.checkpoint_directory}/best')

        starting = True
        wins = 0
        for i in range(self.test_games):
            turn = starting
            state = self.game.get_initial_state()
            while not self.game.is_finished(state):
                if turn:
                    move = self.actor.select_move(state)
                else:
                    move = best_actor.select_move(state)
                state = game.get_outcome_state(state, move[0])
                turn = not turn

            result = game.evaluate_state(state)
            if result == 1 and starting or result == -1 and not starting:
                wins += 1
            starting = not starting

        print(f'won {wins}/{self.test_games}')
        if wins > self.test_games / 2:
            self.actor.save_checkpoint(self.checkpoint_directory + '/best')
            with open(self.checkpoint_directory + '/best.txt', 'w') as f:
                f.write(str(self.game_count))
            return True
        return False

    def train_network(self):
        minibatch = random.sample(
            self.replay_buffer,
            min(self.minibatch_size, len(self.replay_buffer)))
        for i in range(len(minibatch)):
            minibatch[i] = self.game.format_for_nn(
                minibatch[i][0], format=self.actor.format), minibatch[i][1]
        self.actor.network.train(minibatch=minibatch)

    def create_default_policy(self):
        def actor_default_policy(state, moves):
            move = self.actor.select_move(state, stochastic=True)
            return move

        return actor_default_policy

    def add_to_replay_buffer(self, state, probabilities):
        self.replay_buffer.append((state, probabilities))
        self.rp_count += 1

        if self.replay_save_interval != -1 and self.rp_count % self.replay_save_interval == 0 and self.rp_count != 0:
            replays = len(self.replay_buffer)
            self.save_replays(
                itertools.islice(self.replay_buffer,
                                 replays - self.replay_save_interval, replays))

    def save_replays(self, replays):
        if self.replay_file is None:
            return

        with open(self.replay_file, 'a') as f:
            for replay in replays:
                state_string = ','.join(map(str, replay[0][0])) + ',' + str(
                    replay[0][1])
                probs_string = ','.join(map(str, replay[1]))
                rp_string = state_string + ';' + probs_string
                f.write(rp_string + '\n')

    def load_replays(self):
        with open(self.replay_file, 'r') as f:
            for line in f:
                state, probs = line.split(';')
                state = list(map(int, state.split(',')))
                player = state[-1]
                board = state[:-1]
                probs = list(map(float, probs.split(',')))
                self.replay_buffer.append(((board, player), probs))

    def load_actor_from_file(self):
        with open(f'{self.checkpoint_directory}/actor_params.txt') as f:
            lines = f.read().split('\n')
            format = lines[0]
            optimizer = 'adam'
            if len(lines) > 1:
                optimizer = lines[1]

        with open(f'{self.checkpoint_directory}/actor_layers.bin', 'rb') as f:
            layers = pickle.load(f)

        return Actor(self.game, layers, format=format, optimizer=optimizer)

    def save_actor_to_file(self):
        with open(f'{self.checkpoint_directory}/actor_params.txt', 'w') as f:
            f.write(self.actor.format + '\n')
            f.write(self.actor.optimizer)

        with open(f'{self.checkpoint_directory}/actor_layers.bin', 'wb') as f:
            pickle.dump(self.actor.layers, f)
Beispiel #13
0
 def __init__(self, name, m):
     Player.__init__(self, name)
     self.brain = MCTS(m)
Beispiel #14
0
from mcts.mcts import MCTS
from games.nim import Nim
from games.random_player import RandomPlayer
import random

# Instantiate our game with given parameters
game = Nim(9, 3)
num_games = 50
play_mode = 0

# Create a new MCTS player for player 1
player1 = MCTS(game, simulations=1000)
# Create player 2 - either as the same player as player 1, or as a random player
player2 = (player1, RandomPlayer(game))[0]
players = [player1, player2]


def run_single_game(starting_player=0, verbose=False):
    """
    Runs a simulation of a single game, and returns the winning player.
    :param starting_player: The player that should start the game.
    :param verbose: If True, string representations of all moves will be printed to the console.
    :return: 0 if player 1 is the winner, 1 if player 2 is the winner.
    """
    state = game.get_initial_state(starting_player)
    current_player = starting_player
    for p in players:
        p.set_state(state)

    while not game.is_finished(state):
        move = players[current_player].select_move()
Beispiel #15
0
class BasicClientActor(BasicClientActorAbs):
    def __init__(self, ip_address=None, verbose=True, auto_test=False):
        self.series_id = -1
        self.starting_player = -1
        self.game_count = 0
        self.series_count = 0
        self.series_game_count = 0
        BasicClientActorAbs.__init__(self,
                                     ip_address,
                                     verbose=verbose,
                                     auto_test=auto_test)

        trainer = ActorTrainer(self.hex,
                               'model/1000x500x100-200',
                               start_game=250)
        #self.actor = trainer.actor
        self.actor = MCTS(self.hex, simulations=100)

    def handle_get_action(self, state):
        """
        Here you will use the neural net that you trained using MCTS to select a move for your actor on the current
        board. Remember to use the correct player_number for YOUR actor! The default action is to select a random empty
        cell on the board. This should be modified.
        :param state: The current board in the form (1 or 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), where
        1 or 2 indicates the number of the current player.  If you are player 2 in the current series, for example,
        then you will see a 2 here throughout the entire series, whereas player 1 will see a 1.
        :return: Your actor's selected action as a tuple (row, column)
        """
        current_player = state[0] - 1
        board = list(state[1:])
        state = (board, current_player)
        #next_move = self.actor.select_move(state)[0][0]
        self.actor.set_state(state)
        next_move = self.actor.select_move()[0]
        return next_move

    def handle_series_start(self, unique_id, series_id, player_map, num_games,
                            game_params):
        """
        Set the player_number of our actor, so that we can tell our MCTS which actor we are.
        :param unique_id: integer identifier for the player within the whole tournament database
        :param series_id: (1 or 2) indicating which player this will be for the ENTIRE series
        :param player_map: a list of tuples: (unique-id series-id) for all players in a series
        :param num_games: number of games to be played in the series
        :param game_params: important game parameters.  For Hex = list with one item = board size (e.g. 5)
        :return

        """
        self.series_id = series_id
        self.series_count += 1
        print(f'Series {self.series_count} starting')
        print(f'Series ID: {series_id}')
        self.series_game_count = 0
        #############################
        #
        #
        # YOUR CODE (if you have anything else) HERE
        #
        #
        ##############################

    def handle_game_start(self, start_player):
        """
        :param start_player: The starting player number (1 or 2) for this particular game.
        :return
        """
        self.starting_player = start_player
        self.game_count += 1
        print(
            f'Game {self.game_count} starting. (Game {self.series_game_count} in series.)'
        )
        #############################
        #
        #
        # YOUR CODE (if you have anything else) HERE
        #
        #
        ##############################

    def handle_game_over(self, winner, end_state):
        """
        Here you can decide how to handle what happens when a game finishes. The default action is to print the winner
        and the end state.
        :param winner: Winner ID (1 or 2)
        :param end_state: Final state of the board.
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        ##############################
        print()
        print("Game over, these are the stats:")
        print('Winner: ' + str(winner))
        print('End state:')
        self.print_state(end_state)

    def handle_series_over(self, stats):
        """
        Here you can handle the series end in any way you want; the initial handling just prints the stats.
        :param stats: The actor statistics for a series = list of tuples [(unique_id, series_id, wins, losses)...]
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Series ended, these are the stats:")
        print(f'Series ID: {self.series_id}')
        for stat in stats:
            if stat[1] == self.series_id:
                # Found my stats
                print(
                    f'Won {stat[2]}/{stat[2] + stat[3]} ({stat[2]/(stat[2]+stat[3]):.0%})'
                )
        print()
        # print(str(stats))

    def handle_tournament_over(self, score):
        """
        Here you can decide to do something when a tournament ends. The default action is to print the received score.
        :param score: The actor score for the tournament
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Tournament over. Your score was: " + str(score))

    def handle_illegal_action(self, state, illegal_action):
        """
        Here you can handle what happens if you get an illegal action message. The default is to print the state and the
        illegal action.
        :param state: The state
        :param illegal_action: The illegal action
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("An illegal action was attempted:")
        print('State: ' + str(state))
        print('Action: ' + str(illegal_action))
Beispiel #16
0
 def __init__(self, gnn, test_graphs, filename):
     self.mcts = MCTS(gnn)
     self.test_graphs = test_graphs
     self.test_result = []
     self.filename = filename
testfile = "trivialExample.in"
problem = FileReader(testfile)
print("Video Sizes: %r" % (problem.videoSizes,))
print("Endpoints:\n\t%s" % ("\n\t".join([str(e) for e in problem.endpoints])))
print("Requests: %r" % ([r for r in problem.requests]))

# Generate initial state
initial_contents = list([(0, []) for _ in range(problem.nCaches)])
initial_score = 0
initial_state = TreeState(caches_contents=initial_contents, score=initial_score,
                          problem=problem)

# Generate the optimal end state
mcts = MCTS(tree_policy=UCB1(c=1.41),
            default_policy=immediate_reward,
            backup=monte_carlo)

node = StateNode(parent=None, state=initial_state)

while True:
    if node.state.is_terminal():
        print("Terminal node reached.")
        break
    print("Finding best action")
    best_action = mcts(node)
    print("Performing action")
    node = StateNode(parent=None, state=node.state.perform(best_action))
    print("Score now is: %d" % node.state.score)

    print("Saving output")
Beispiel #18
0
def mcts_agent(game: ".game.Game", move_number) -> ".game.Game":
    mcts = MCTS(game, move_number)
    cards_to_choose, cards_attack = mcts.choose_next_move()
    choose_card_from_hand(game, ChooseCard.DEFINED_CARDS, cards_to_choose)
    attack_opponent(game, AttackOpponent.DEFINED_CARDS, cards_attack)
Beispiel #19
0
    timeout = int(args.timeout) - 5

    c1 = Client(args.ip, PORTS[player_arg], player_arg)
    c1.send_name(PLAYER_NAMES[player_arg])

    board = Board()
    game = Game(board)
    # Main game loop
    try:
        while not game.ended:
            state = None
            while state is None:
                state, turn = c1.receive_state()
                game.board.board = state
                if turn not in ["black", "white"]:
                    raise GameEndedException
                game.turn = TURN_MAPPING[turn]
                print(state, turn)
            if game.turn == OUR_PLAYER:
                mcts = MCTS(deepcopy(game),
                            OUR_PLAYER,
                            max_depth=max_depth,
                            C=C)
                start, end = mcts.search(timeout)
                print(start, end)
                c1.send_move(start, end)
    except GameEndedException:
        print("Game ended with state {}".format(turn))

    c1.close()