Python ANETの例、ANET.ANET Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(self, IP_address=None, verbose=True):
        self.series_id = -1
        self.size = 6

        self.game = Hex(6)
        self.model = ANET(0.9, (10, 15, 20), 'linear', 'sgd', self.size)
        self.model.load_model(200)

        BasicClientActorAbs.__init__(self, IP_address, verbose=verbose)

コード例 #2

0

ファイルを表示

    def __init__(self) -> None:
        self.__actual_game = SimulatedWorldFactory.get_simulated_world()
        self.__replay_buffer = np.empty((0, parameters.STATE_SIZE + parameters.NUMBER_OF_ACTIONS))  # RBUF
        self.__ANET = ANET()

        self.__episodes = parameters.EPISODES
        self.__min_number_of_roullouts = parameters.MIN_NUMBER_OF_ROLLOUTS
        self.__simulation_time_out = parameters.SIMULATION_TIME_OUT
        self.__caching_interval = self.__episodes // (parameters.ANETS_TO_BE_CACHED - 1)
        self.__batch_size = parameters.ANET_BATCH_SIZE
        self.__replay_buffer_size = parameters.REPLAY_BUFFER_SIZE
        self.__buffer_insertion_index = 0

コード例 #3

0

ファイルを表示

    def __init__(self, game, state, anet_config, size, save_interval):
        """Init MCTS-object

        :param game:
        :param state:
        """
        self.game_manager = game
        self.root_node = Node(state, None)
        self.ANET = ANET(anet_config[0], anet_config[1], anet_config[2], anet_config[3], size)
        self.RBUF = []
        self.save_interval = save_interval
        self.time = np.zeros(5)
        self.size = size

コード例 #4

0

ファイルを表示

    def load_agents(self, series_name, episodes, num_agents):
        """
        Load all pre-trained actor neural networks into a list of agents
        :return: list[ANET]
        """
        players = []
        for num_games_trained in range(0, episodes + 1,
                                       int(episodes / (num_agents - 1))):

            anet = ANET(input_size=self.board_size,
                        hidden_layers=self.hidden_layers)
            anet.load_anet(series_name, self.board_size, num_games_trained)
            anet.model.eval()
            players.append(anet)
        return players

コード例 #5

0

ファイルを表示

ファイル: HexTrainer.py プロジェクト: alfredronning/HEX2

def main():
    size = 5

    startState = HexState(player = 1, hexSize = size)

    anet = ANET(
        layer_dims = [size*size*2+2, size*size, size*size],
        case_manager = CaseManager([]),
        learning_rate=0.001,
        display_interval=None,
        minibatch_size=10,
        validation_interval=None,
        softmax=True,
        error_function="ce",
        hidden_activation_function="relu",
        optimizer="adam",
        w_range=[0.0, 0.1],
        grabvars_indexes=[],
        grabvars_types=[],
        lr_freq = None, bs_freq = None, early_stopping=False, target_accuracy=None
        )

    trainer = HexTrainer(startState = startState,
        anet = anet,
        numberOfGames = 2,
        numberOfSimulations = 100,
        batchSize = 64,
        verbose = False,
        savedGames = 5,
        saveFolder = "netsaver/topp5random/")

    trainer.run()

コード例 #6

0

ファイルを表示

    def handle_series_start(self, unique_id, series_id, player_map, num_games, game_params):
        """
        Set the player_number of our actor, so that we can tell our MCTS which actor we are.
        :param unique_id - integer identifier for the player within the whole tournament database
        :param series_id - (1 or 2) indicating which player this will be for the ENTIRE series
        :param player_map - a list of tuples: (unique-id series-id) for all players in a series
        :param num_games - number of games to be played in the series
        :param game_params - important game parameters.  For Hex = list with one item = board size (e.g. 5)
        :return

        """

        self.series_id = series_id
        #############################
        self.board_size = game_params[0]
        self.actor = ANET(self.board_size)
        self.actor.load_anet(self.actor_tag, self.board_size, self.actor_level)
        self.actor.model.eval()

コード例 #7

0

ファイルを表示

 def get_agents():
     _, _, models = next(walk('models'))
     models = filter(lambda name: name[0] != '.', models)
     models = sorted(models, key=lambda name: int(name.split(".")[0]))
     agents = []
     for model in models:
         agent = ANET(model)
         agents.append(agent)
     return agents

コード例 #8

0

ファイルを表示

    def loadParams(self, layerDims, loadPath, globalStep):
        self.anet = ANET(
        layer_dims = layerDims,
        softmax=True,
        case_manager = CaseManager([]))

        session = TFT.gen_initialized_session(dir="probeview")
        self.anet.current_session = session
        state_vars = []
        for m in self.anet.layer_modules:
            vars = [m.getvar('wgt'), m.getvar('bias')]
            state_vars = state_vars + vars
        self.anet.state_saver = tf.train.Saver(state_vars)
        self.anet.state_saver.restore(self.anet.current_session, loadPath+"-"+str(globalStep))

コード例 #9

0

ファイルを表示

class ReinforcementLearner:
    """
    Reinforcement Learner agent using the Actor-Critic architecture

    ...

    Attributes
    ----------

    Methods
    -------
    run() -> None:
        Runs all episodes with pivotal parameters
    run_one_game(player_1: ANET, player_2: ANET, visualize=False) -> None:
        Runs excatly one game with the provided players.
    """

    def __init__(self) -> None:
        self.__actual_game = SimulatedWorldFactory.get_simulated_world()
        self.__replay_buffer = np.empty((0, parameters.STATE_SIZE + parameters.NUMBER_OF_ACTIONS))  # RBUF
        self.__ANET = ANET()

        self.__episodes = parameters.EPISODES
        self.__min_number_of_roullouts = parameters.MIN_NUMBER_OF_ROLLOUTS
        self.__simulation_time_out = parameters.SIMULATION_TIME_OUT
        self.__caching_interval = self.__episodes // (parameters.ANETS_TO_BE_CACHED - 1)
        self.__batch_size = parameters.ANET_BATCH_SIZE
        self.__replay_buffer_size = parameters.REPLAY_BUFFER_SIZE
        self.__buffer_insertion_index = 0

    def __run_one_episode(self,) -> None:
        initial_game_state = self.__actual_game.reset()
        monte_carlo_tree = MCTS(initial_game_state)
        root_state = initial_game_state

        while not self.__actual_game.is_final_state():
            monte_carlo_game = SimulatedWorldFactory.get_simulated_world(root_state)

            number_of_rollouts = 0
            start_time = time()
            while time() - start_time < self.__simulation_time_out or number_of_rollouts < self.__min_number_of_roullouts:
                monte_carlo_tree.do_one_simulation(self.__ANET.choose_epsilon_greedy, monte_carlo_game)
                monte_carlo_game.reset(root_state)
                number_of_rollouts += 1
            # print(f'Rollouts: {number_of_rollouts}')

            target_distribution = monte_carlo_tree.get_normalized_distribution()
            self.__add_to_replay_buffer(root_state, target_distribution)

            action = monte_carlo_tree.root.tree_policy()
            next_state, _ = self.__actual_game.step(action)

            monte_carlo_tree.update_root(action)
            root_state = next_state

        # Train ANET on a random minibatch of cases from RBUF
        random_rows = self.__sample_replay_buffer()
        self.__ANET.fit(self.__replay_buffer[random_rows])

    def __add_to_replay_buffer(self, root_state: Tuple[int, ...], target_distribution: Tuple[float, ...]):
        training_instance = np.array([root_state + target_distribution], dtype=np.float64)
        if self.__buffer_insertion_index < self.__replay_buffer_size:
            self.__replay_buffer = np.append(self.__replay_buffer, training_instance, axis=0)  # type: ignore
        else:
            i = self.__buffer_insertion_index % self.__replay_buffer_size
            self.__replay_buffer[i] = training_instance  # type: ignore
        self.__buffer_insertion_index += 1

    def __sample_replay_buffer(self):
        number_of_rows = min(self.__buffer_insertion_index, self.__replay_buffer_size)
        batch_size = min(number_of_rows, self.__batch_size)
        return random.sample(range(0, number_of_rows), batch_size)

    def run(self) -> None:
        """
        Runs all episodes with pivotal parameters.
        Visualizes one round at the end.
        """
        self.__ANET.save('0.h5')  # Save the untrained ANET prior to episode 1
        for episode in range(1, self.__episodes + 1):
            print('\nEpisode:', episode)
            self.__run_one_episode()

            if episode % self.__caching_interval == 0:
                # Save ANET for later use in tournament play.
                self.__ANET.save(str(episode) + '.h5')

        Visualize.plot_loss(self.__ANET.loss_history)
        Visualize.plot_epsilon(self.__ANET.epsilon_history)

        if parameters.VISUALIZE_GAMES:
            print('Showing one episode with the greedy strategy.')
            ReinforcementLearner.run_one_game(self.__ANET, self.__ANET, True)

    @staticmethod
    def run_one_game(player_1: ANET, player_2: ANET, visualize: bool) -> int:
        """
        Runs excatly one game with the provided players.
        """
        world = SimulatedWorldFactory.get_simulated_world()
        current_state = world.reset()

        if visualize and parameters.GAME_TYPE == Game.Hex:
            Visualize.initialize_board(current_state)

        players = (player_1, player_2)
        i = 0
        winner = 0
        while not world.is_final_state():
            legal_actions = world.get_legal_actions()

            action = players[i].choose_greedy(current_state, legal_actions)
            current_state, winner = world.step(action)

            # Alternating players
            i = (i + 1) % 2

            if visualize and parameters.GAME_TYPE == Game.Hex:
                Visualize.draw_board(current_state, winner, str(player_1), str(player_2))

        print(f'Player {winner} won the game.')
        return winner

コード例 #10

0

ファイルを表示

class BasicClientActor(BasicClientActorAbs):

    def __init__(self, IP_address=None, verbose=True):
        self.series_id = -1
        BasicClientActorAbs.__init__(self, IP_address, verbose=verbose)

    def handle_get_action(self, state):
        """
        Here you will use the neural net that you trained using MCTS to select a move for your actor on the current board.
        Remember to use the correct player_number for YOUR actor! The default action is to select a random empty cell
        on the board. This should be modified.
        :param state: The current board in the form (1 or 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), where
        1 or 2 indicates the number of the current player.  If you are player 2 in the current series, for example,
        then you will see a 2 here throughout the entire series, whereas player 1 will see a 1.
        :return: Your actor's selected action as a tuple (row, column)
        """

        # This is an example player who picks random moves. REMOVE THIS WHEN YOU ADD YOUR OWN CODE !!
        #############################
        if state[0] == 2:
            state[0] = 1
        _, next_move = self.actor.get_move(state)

        # ?: switch row and col

        row = next_move//self.board_size
        col = next_move % self.board_size
        next_move = (row, col)
        return row, col
        ##############################

    def handle_series_start(self, unique_id, series_id, player_map, num_games, game_params):
        """
        Set the player_number of our actor, so that we can tell our MCTS which actor we are.
        :param unique_id - integer identifier for the player within the whole tournament database
        :param series_id - (1 or 2) indicating which player this will be for the ENTIRE series
        :param player_map - a list of tuples: (unique-id series-id) for all players in a series
        :param num_games - number of games to be played in the series
        :param game_params - important game parameters.  For Hex = list with one item = board size (e.g. 5)
        :return

        """

        self.series_id = series_id
        #############################
        self.board_size = game_params[0]
        self.actor = ANET(self.board_size)
        self.actor.load_anet(self.actor_tag, self.board_size, self.actor_level)
        self.actor.model.eval()

        ##############################

    def handle_game_start(self, start_player):
        """
        :param start_player: The starting player number (1 or 2) for this particular game.
        :return
        """
        self.starting_player = start_player
        #############################
        #
        #
        # YOUR CODE (if you have anything else) HERE
        #
        #
        ##############################

    def handle_game_over(self, winner, end_state):
        """
        Here you can decide how to handle what happens when a game finishes. The default action is to print the winner and
        the end state.
        :param winner: Winner ID (1 or 2)
        :param end_state: Final state of the board.
        :return:
        """
        #############################
        #
        #

        #
        #
        ##############################
        print("Game over, these are the stats:")
        print('Winner: ' + str(winner))
        print('End state: ' + str(end_state))

    def handle_series_over(self, stats):
        """
        Here you can handle the series end in any way you want; the initial handling just prints the stats.
        :param stats: The actor statistics for a series = list of tuples [(unique_id, series_id, wins, losses)...]
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Series ended, these are the stats:")
        print(str(stats))

    def handle_tournament_over(self, score):
        """
        Here you can decide to do something when a tournament ends. The default action is to print the received score.
        :param score: The actor score for the tournament
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Tournament over. Your score was: " + str(score))

    def handle_illegal_action(self, state, illegal_action):
        """
        Here you can handle what happens if you get an illegal action message. The default is to print the state and the
        illegal action.
        :param state: The state
        :param action: The illegal action
        :return:
        """
        #############################
        #
        #
        #
        #
        #
        #############################
        print("An illegal action was attempted:")
        print('State: ' + str(state))
        print('Action: ' + str(illegal_action))

コード例 #11

0

ファイルを表示

 def __init__(self, IP_address=None, verbose=True):
     self.series_id = -1
     BasicClientActorAbs.__init__(self, IP_address, verbose=verbose)
     model_file = 'src/HexPlayer/' + input("Modellnavn (må ligge i src/HexPlayer/): ") + '.h5'
     self.ANET = ANET(model_file, '.')

コード例 #12

0

ファイルを表示

class BasicClientActor(BasicClientActorAbs):

    def __init__(self, IP_address=None, verbose=True):
        self.series_id = -1
        BasicClientActorAbs.__init__(self, IP_address, verbose=verbose)
        model_file = 'src/HexPlayer/' + input("Modellnavn (må ligge i src/HexPlayer/): ") + '.h5'
        self.ANET = ANET(model_file, '.')

    def handle_get_action(self, state):
        """
        Here you will use the neural net that you trained using MCTS to select a move for your actor on the current board.
        Remember to use the correct player_number for YOUR actor! The default action is to select a random empty cell
        on the board. This should be modified.
        :param state: The current board in the form (1 or 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), where
        1 or 2 indicates the number of the current player.  If you are player 2 in the current series, for example,
        then you will see a 2 here throughout the entire series, whereas player 1 will see a 1.
        :return: Your actor's selected action as a tuple (row, column)
        """

        valid_actions = Hex.get_valid_actions(state)
        next_move = self.ANET.choose_greedy(state, valid_actions)
        # Hvis vi vil ha random første trekk:
        # if sum(state[1:]) == 0:
        #     next_move = self.ANET.choose_uniform(valid_actions)
        row, column = Hex.index_to_coordinates(next_move, 6)

        return (row, column)

    def handle_series_start(self, unique_id, series_id, player_map, num_games, game_params):
        """
        Set the player_number of our actor, so that we can tell our MCTS which actor we are.
        :param unique_id - integer identifier for the player within the whole tournament database
        :param series_id - (1 or 2) indicating which player this will be for the ENTIRE series
        :param player_map - a list of tuples: (unique-id series-id) for all players in a series
        :param num_games - number of games to be played in the series
        :param game_params - important game parameters.  For Hex = list with one item = board size (e.g. 5)
        :return
        """
        self.series_id = series_id
        #############################
        #
        #
        # YOUR CODE (if you have anything else) HERE
        #
        #
        ##############################

    def handle_game_start(self, start_player):
        """
        :param start_player: The starting player number (1 or 2) for this particular game.
        :return
        """
        self.starting_player = start_player
        #############################
        #
        #
        # YOUR CODE (if you have anything else) HERE
        #
        #
        ##############################

    def handle_game_over(self, winner, end_state):
        """
        Here you can decide how to handle what happens when a game finishes. The default action is to print the winner and
        the end state.
        :param winner: Winner ID (1 or 2)
        :param end_state: Final state of the board.
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        ##############################
        print("Game over, these are the stats:")
        print('Winner: ' + str(winner) + '. We ' + ('won' if winner == self.series_id else 'lost'))
        print('End state: ' + str(end_state))

    def handle_series_over(self, stats):
        """
        Here you can handle the series end in any way you want; the initial handling just prints the stats.
        :param stats: The actor statistics for a series = list of tuples [(unique_id, series_id, wins, losses)...]
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Series ended, these are the stats:")
        print(str(stats))

    def handle_tournament_over(self, score):
        """
        Here you can decide to do something when a tournament ends. The default action is to print the received score.
        :param score: The actor score for the tournament
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Tournament over. Your score was: " + str(score))

    def handle_illegal_action(self, state, illegal_action):
        """
        Here you can handle what happens if you get an illegal action message. The default is to print the state and the
        illegal action.
        :param state: The state
        :param action: The illegal action
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("An illegal action was attempted:")
        print('State: ' + str(state))
        print('Action: ' + str(illegal_action))

コード例 #13

0

ファイルを表示

class MCTS_ANET:

    def __init__(self, game, state, anet_config, size, save_interval):
        """Init MCTS-object

        :param game:
        :param state:
        """
        self.game_manager = game
        self.root_node = Node(state, None)
        self.ANET = ANET(anet_config[0], anet_config[1], anet_config[2], anet_config[3], size)
        self.RBUF = []
        self.save_interval = save_interval
        self.time = np.zeros(5)
        self.size = size

    def simulate(self, t):
        """ Simulation of M different tree-searches to determine a move

        :param m: amount of simulations
        """
        #self.time = np.zeros(5)
        start = time.time()
        self.expansion(self.root_node)
        while time.time() - start < t:
            leaf, moves = self.tree_search()
            if leaf.is_final_state:
                reward = self.evaluation(leaf, moves)
                self.backpropagation(leaf, reward)
            elif len(leaf.children) == 0:
                self.expansion(leaf)
                child = leaf.children[random.randint(0, len(leaf.children) - 1)]
                moves += 1
                reward = self.evaluation(child, moves)
                child.visits += 1
                self.backpropagation(child, reward)
            else:
                for child in leaf.children:
                    if child.visits == 0:
                        moves += 1
                        reward = self.evaluation(child, moves)
                        child.visits += 1
                        self.backpropagation(child, reward)
                        break
                if leaf.visits > len(leaf.children):
                    leaf.is_expanded = True
        distribution = np.zeros(len(self.root_node.state) - 1)
        for child in self.root_node.children:
            distribution[child.action[0]] = child.visits
        print("Dist:", distribution)
        distribution = distribution / sum(distribution)
        self.RBUF.append((self.root_node.state, distribution))
        if len(self.RBUF) > 2000:
            self.RBUF.reverse()
            new_buff = self.RBUF[:2000]
            save_RBUF(self.RBUF[2000:], self.size)
            self.RBUF = new_buff
            self.RBUF.reverse()
        #print(time.time() - start)
        #print("Tree search: {:.2}s. \nExpansion: {:.2}s. \nEvaluation: {:2f}s. \nBackpropagation {:.2}s. \nTraining {:.2}s ".format(*self.time))

        return distribution.argmax()

    def tree_search(self):
        """ Traversing the tree from the root to a leaf node by using the tree policy

        :return: (Node, moves from root to leaf.)
        """
        start = time.time()
        node = self.root_node
        node.visits += 1
        minmax = True
        moves = 0
        is_expanded = node.is_expanded

        if get_best_child(node, True).is_final_state:
            child = get_best_child(node, True)
            child.visits += 1
            return child, 1

        while is_expanded is True:
            node = get_best_child(node, minmax)
            node.visits += 1
            is_expanded = node.is_expanded
            minmax = True if minmax is False else False
            moves += 1
        self.time[0] += time.time()-start
        return node, moves

    def expansion(self, leaf):
        """ Generating some or all child states of a parent state, and then connecting the tree
            node housing the parent state (a.k.a. parent node) to the nodes housing the child states (a.k.a. child
            nodes)

        :param leaf: Node, the node that is to be expanded
        """
        start = time.time()
        children = self.game_manager.get_child_action_pair(leaf.state)
        leaf.children = [Node(state, action) for state, action in children]
        for child in leaf.children:
            child.parent = leaf
            leaf.q_values[child.action] = 0
            if self.game_manager.is_win(child.state):
                child.is_final_state = True
        self.time[1] += time.time() - start

    def evaluation(self, leaf, moves, epsilon=0.2):
        """ Estimating the value of a leaf node in the tree by doing a rollout simulation using
            the default policy from the leaf node’s state to a final state.

        :param leaf: Node, to be evaluated
        :param moves: int, number of moves from root to finish node
        :return: int, reward
        """
        s = time.time()
        t = np.zeros(5)
        state = copy.deepcopy(leaf.state)
        while not self.game_manager.is_win(state):
            start = time.time()
            rand_int = random.randint(0,9)
            actions = self.game_manager.get_actions(state)
            if rand_int >= epsilon*10:
                distribution = self.ANET.distribution(state)
                action = distibution_to_action(distribution, actions)
            else:
                action = actions[random.randint(0,len(actions)-1)]
            state = self.game_manager.do_action(state, action)
            moves += 1
        self.time[2] += time.time() - s
        return -1 if moves % 2 == 0 else 1/moves

    def backpropagation(self, leaf, reward):
        """ Passing the evaluation of a final state back up the tree, updating relevant data
            at all nodes and edges on the path from the final state to the tree root.

        :param leaf: Node, leaf node the rollout was from
        :param reward: int, reward to backpropagate
        """
        start = time.time()
        leaf.reward += reward
        node = leaf
        while node.parent:
            node.parent.reward += reward
            node.parent.q_values[node.action] = node.reward / node.visits
            node = node.parent
        self.time[3] += time.time() - start

    def get_action(self):
        """ Get the next action to be performed based on q_values

        :return: action to be performed
        """
        max_val = max(self.root_node.q_values.values())
        for action, value in self.root_node.q_values.items():
            if value == max_val:
                return action

    def set_new_root(self, state):
        """
        Function to set the new root and keep the children of the new root
        """
        v = [c.visits for c in self.root_node.children]
        #print("Root visits: ", v)
        for child in self.root_node.children:
            if child.state == state:
                self.root_node = child
                self.root_node.parent = None
                break

    def reset(self, state):
        """ Reset the root node to a given state

        :param state: state to be "noded"
        """
        self.root_node = Node(state, None)

    def train(self, g):
        """
        Method to train the neural net. This implementation only select one instance of a state (to avoid overfitting on rootnode).
        Implementation can be changed to include all states in RBUF by commenting out the if statement

        Also saving the NN for every g itteration
        """
        start = time.time()
        x_train = []
        y_train = []
        rbuf_copy = copy.deepcopy(self.RBUF)
        random.shuffle(rbuf_copy)
        for root, dist in rbuf_copy:
            x_train.append(copy.deepcopy(root))
            y_train.append(copy.deepcopy(dist))
        self.ANET.train(x_train, y_train)

        if (g+1) % self.save_interval == 0:
            self.ANET.save_model(g+1)
        elif g == 0:
            self.ANET.save_model(g)
        self.time[4] += time.time() - start

コード例 #14

0

ファイルを表示

ファイル: train.py プロジェクト: Siglangf/KI_2

        board_size = int(input("Board size: "))
        episodes = int(input("Episodes: "))
        num_simulations = int(input("Number of simulations: "))
        num_agents = int(input("Number of agents: "))
        batch_size = float(input("Batch size: "))
        eps = float(input("Epsilon for rollout: "))
    else:
        series_name = input("Series Name: ")
        board_size = BOARD_SIZE
        episodes = EPISODES
        num_simulations = NUM_SIMULATIONS
        num_agents = NUM_AGENTS
        batch_size = BATCH_SIZE
        eps = EPSILON

    board_actual_game = Hex(board_size)
    batch_strategy = "probability_function"

    anet = ANET(input_size=board_size,
                hidden_layers=HIDDEN_LAYERS,
                lr=LEARNING_RATE,
                activation=ACTIVATION,
                optimizer=OPTIMIZER,
                EPOCHS=EPOCHS)

    if os.path.exists(f"models/{series_name}_{board_size}_ANET_level_{0}"):
        # ! NOT TESTED YET
        anet.load_anet(series_name, board_size, episodes)
        series_name += "continued"
    train_anet(series_name, anet, board_size, board_actual_game, episodes,
               num_simulations, num_agents, batch_strategy, batch_size, eps)

コード例 #15

0

ファイルを表示

class BasicClientActor(BasicClientActorAbs):
    def __init__(self, IP_address=None, verbose=True):
        self.series_id = -1
        self.size = 6

        self.game = Hex(6)
        self.model = ANET(0.9, (10, 15, 20), 'linear', 'sgd', self.size)
        self.model.load_model(200)

        BasicClientActorAbs.__init__(self, IP_address, verbose=verbose)

    def handle_get_action(self, state):
        """
        Here you will use the neural net that you trained using MCTS to select a move for your actor on the current board.
        Remember to use the correct player_number for YOUR actor! The default action is to select a random empty cell
        on the board. This should be modified.
        :param state: The current board in the form (1 or 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), where
        1 or 2 indicates the number of the current player.  If you are player 2 in the current series, for example,
        then you will see a 2 here throughout the entire series, whereas player 1 will see a 1.
        :return: Your actor's selected action as a tuple (row, column)
        """

        # This is an example player who picks random moves. REMOVE THIS WHEN YOU ADD YOUR OWN CODE !!

        # next_move = tuple(self.pick_random_free_cell(
        #   state, size=int(math.sqrt(len(state)-1))))
        #############################
        #
        #
        # YOUR CODE HERE
        #
        # next_move = ???
        ##############################

        # Endre hvilket lagret ANET som skal spille her:
        state = list(state)
        self.series_id = state[0]
        distribution = self.model.distribution(state)
        actions = self.game.child_actions(state[1:], self.series_id)
        action = distibution_to_action(distribution, actions)[0]

        row = int(np.floor(action / self.size))
        col = action % self.size

        next_move = (row, col)

        return next_move

    def handle_series_start(self, unique_id, series_id, player_map, num_games,
                            game_params):
        """
        Set the player_number of our actor, so that we can tell our MCTS which actor we are.
        :param unique_id - integer identifier for the player within the whole tournament database
        :param series_id - (1 or 2) indicating which player this will be for the ENTIRE series
        :param player_map - a list of tuples: (unique-id series-id) for all players in a series
        :param num_games - number of games to be played in the series
        :param game_params - important game parameters.  For Hex = list with one item = board size (e.g. 5)
        :return

        """
        self.series_id = series_id

    def handle_game_start(self, start_player):
        """
        :param start_player: The starting player number (1 or 2) for this particular game.
        :return
        """
        self.starting_player = start_player
        #############################
        #
        #
        # YOUR CODE (if you have anything else) HERE
        #
        #
        ##############################

    def handle_game_over(self, winner, end_state):
        """
        Here you can decide how to handle what happens when a game finishes. The default action is to print the winner and
        the end state.
        :param winner: Winner ID (1 or 2)
        :param end_state: Final state of the board.
        :return:
        """

        self.game.reset_game()

        print("Game over, these are the stats:")
        print('Winner: ' + str(winner))
        print('End state: ' + str(end_state))

    def handle_series_over(self, stats):
        """
        Here you can handle the series end in any way you want; the initial handling just prints the stats.
        :param stats: The actor statistics for a series = list of tuples [(unique_id, series_id, wins, losses)...]
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Series ended, these are the stats:")
        print(str(stats))

    def handle_tournament_over(self, score):
        """
        Here you can decide to do something when a tournament ends. The default action is to print the received score.
        :param score: The actor score for the tournament
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("Tournament over. Your score was: " + str(score))

    def handle_illegal_action(self, state, illegal_action):
        """
        Here you can handle what happens if you get an illegal action message. The default is to print the state and the
        illegal action.
        :param state: The state
        :param action: The illegal action
        :return:
        """
        #############################
        #
        #
        # YOUR CODE HERE
        #
        #
        #############################
        print("An illegal action was attempted:")
        print('State: ' + str(state))
        print('Action: ' + str(illegal_action))