예제 #1
0
    def step(self, player_str, action_idx, next_strategy=None):
        log.debug(
            paint("\n----------- Performing GAME step -----------",
                  bcolors.REF))
        action_str = self.game_def.encoder.all_actions[action_idx]

        try:
            legal_action = self.current_state.get_legal_action_from_str(
                action_str)
            log.debug(legal_action)
        except RuntimeError as err:  #Acounts for Ilegal Action
            log.debug(paint("\tSelected non legal action", bcolors.FAIL))
            player_reward = -100
            log.debug(
                paint_bool(
                    "••••••• EPISODE FINISHED Reward:{} •••••••".format(
                        player_reward), player_reward > 0))
            return self.current_observation, player_reward, True, {}
        #Construct next state
        self.match.add_step(
            Step(self.current_state, legal_action, len(self.match.steps)))
        next_state = self.current_state.get_next(legal_action,
                                                 strategy_path=next_strategy)
        self.current_state = next_state

        #Get information from next state
        done = self.current_state.is_terminal
        goals_dic = self.current_rewards
        player_reward = goals_dic[player_str]
        if (done):
            log.debug(
                paint_bool(
                    "••••••• EPISODE FINISHED Reward:{} •••••••".format(
                        player_reward), player_reward > 0))
        return self.current_observation, player_reward, done, {}
예제 #2
0
 def random_reset(self):
     n_init = len(self.possible_initial_states)
     initial_idx = np.random.randint(n_init)
     self.game_def.initial = self.possible_initial_states[initial_idx]
     initial_state = self.game_def.get_initial_state()
     self.current_state = initial_state
     self.match = Match([Step(initial_state, None, 0)])
예제 #3
0
    def run_episode(game_def, net, expl=0.3):
        """
        Runs one episode to generate examples.
        The full episode will run a MCTS simulation in the root
        if the tree, the choose the best action with the computed
        probabilities and add this as one example. The process will
        then be repeated with the new node until a terminal node in reached. It will generate as many examples as steps in the match.
        """
        examples = []
        state = game_def.get_initial_state()

        root = TreeZero.node_class(Step(state,None,0),"a")
        
        current_state = state
        is_first = True
        j=0
        while True:
            j+=1
            if not is_first:
                current_state = root.step.next_state()
            else:
                current_state = state
                is_first = False
            root = TreeZero.node_class(Step(current_state,None,0),"a")
            tree = TreeZero(root,game_def,net)
            tree.run_mcts(net.args.n_mcts_simulations,expl=expl)
            # if j==1: tree.print_in_file("train-{}.png".format(j))
            if root.step.state.is_terminal:
                examples.append((root.step.state,[game_def.encoder.mask_state(current_state), np.zeros(game_def.encoder.action_size), None]))
                goals = root.step.state.goals
                v = goals[root.step.state.control]
                for s,e in examples[::-1]:
                    e[2]=v
                    # print("Example: \n{}\n{}\n".format(s,v)) 
                    v=-v
                return [e[1] for e in examples]

            pi = root.pis(game_def)
            
            examples.append((current_state,[game_def.encoder.mask_state(current_state), pi, None]))
            a = np.random.choice(game_def.encoder.all_actions, p=pi)
                
            root = [n for n in root.children if str(n.step.action.action)==str(a)][0]
예제 #4
0
    def build(game_def, args):
        """
        Runs the required computation to build a player. For instance, creating a tree or 
        training a model. 
        The computed information should be stored to be accessed latter on using the name_style
        Args:
            game_def (GameDef): The game definition used for the creation
            args (NameSpace): A name space with all the attributes defined in add_parser_build_args
        """
        if not 'first_build' in args:
            log.debug("Creating new files")
            new_files = 'w'
            args.first_build = False
        else:
            log.debug("Appending to existent files")
            new_files = 'a'

        log.debug("Computing mcts for tree")
        state = game_def.get_initial_state()
        root = TreeMCTS.node_class(Step(state, None, 0), args.main_player)
        tree = TreeMCTS(root, game_def, args.main_player)
        tree.run_mcts(args.iter)
        t0 = time.time()
        if (not args.tree_image_file_name is None):
            file_name = '{}/{}'.format(game_def.name,
                                       args.tree_image_file_name)
            tree.print_in_file(file_name=file_name)
            log.debug("Tree image saved in {}".format(file_name))
        n_nodes = tree.get_number_of_nodes()
        if (not args.tree_name is None):
            file_path = "./approaches/mcts/trees/{}/{}".format(
                game_def.name, args.tree_name)
            tree.save_values_in_file(file_path)
            log.debug("Tree saved in {}".format(file_path))
        if (not args.train_file is None):
            file_path = "./approaches/mcts/train/{}/{}".format(
                game_def.name, args.train_file)
            l = tree.get_train_list()
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            training_data_to_csv(file_path,
                                 l,
                                 game_def,
                                 new_files,
                                 extra_array=['p', 'n'])
            log.debug("Training data saved in {}".format(file_path))

        t1 = time.time()
        save_time = round((t1 - t0) * 1000, 3)
        return {'number_of_nodes': n_nodes, 'save_time': save_time}
예제 #5
0
def expand_root(tree, main_player="a"):
    """
    Function to expand a tree downwards until terminal leaves in place

    Args:
        tree (anytree.Node): a tree to expand till its terminal leaves
    """
    disable_tqdm = log.is_disabled_for('debug')
    valid_moves = tree.step.state.legal_actions
    for legal_action in valid_moves:
        step = Step(tree.step.state, legal_action, 1)
        TreeMinmax.node_class(step, main_player, parent=tree)
    expand_further = True
    time_step = 2
    while expand_further:
        # define current player
        expand_further = False
        # starting iteration to fill branches
        log.debug("Depth: %s" % (time_step))
        for leaf in tqdm(tree.leaves, disable=disable_tqdm):
            current_state = leaf.step.state
            if current_state.is_terminal:
                continue

            next_state = current_state.get_next(leaf.step.action)
            valid_moves = next_state.legal_actions
            for legal_action in valid_moves:
                step = Step(next_state, legal_action, time_step)
                TreeMinmax.node_class(step, main_player, parent=leaf)
                expand_further = True

            if next_state.is_terminal:
                goals = next_state.goals
                leaf.score = goals[main_player]

        time_step += 1
예제 #6
0
def minmax_from_game_def(game_def, initial_state=None, main_player="a"):
    """
    Wrapper function to start with a game definition and expand
    root downwards to branch all possibilities. Next, the tree is
    reviewed upwards using the minimax algorithm.

    Args:
        game_def (GameDef*): game definition class
    """
    if initial_state is None:
        initial_state = StateExpanded.from_game_def(game_def, game_def.initial)
    tree = TreeMinmax(main_player=main_player)
    root_node = tree.create_node(Step(initial_state, None, 0))

    # Tree.expand_rec(root_node,0)
    expand_root(tree=root_node, main_player=main_player)
    root_node = build_minimax(root_node, main_player=main_player)
    return TreeMinmax(root_node, main_player)
def match_from_time_model(model, game_def, main_player = None):
    """
    Given a stabel model for the full time representation of the game,
    the functions creates a match with each action taken.

    Args:
        model: Stable model from the full time representation
        game_def: The game definition
        main_player: The player for which we aim to minmax
    """
    atoms = model.symbols(atoms=True)
    fluent_steps = defaultdict(lambda: {'fluents':[],'goals':[],
                                        'action':None})
    for a in atoms:
        if(a.name == "goal"):
            time = a.arguments[2].number
            fluent_steps[time]['goals'].append(a)
        elif(a.name=="holds"):
            time = a.arguments[1].number
            fluent_steps[time]['fluents'].append(a.arguments[0])
        elif(a.name=="does"):
            time = a.arguments[2].number
            fluent_steps[time]['action'] = a
    fluent_steps = dict(fluent_steps)
    steps = []
    for i in range(len(fluent_steps)):
        state = State(fluent_steps[i]['fluents'],fluent_steps[i]['goals'],
                        game_def)
        action = None
        if(not fluent_steps[i]['action']):
            pass
        else:
            action = Action(fluent_steps[i]['action'].arguments[0].name,
                            fluent_steps[i]['action'].arguments[1])
        step = Step(state,action,i)
        steps.append(step)
    steps[-1].state.is_terminal = True
    # steps[-1].set_score_player(main_player)
    # steps[-2].set_score_player(main_player)
    # steps[-2].state.goals = steps[-1].state.goals
    # steps =steps[:-1]
    
    return Match(steps)
예제 #8
0
    def choose_action(self, state):
        """
        The player chooses an action given a current state.

        Args:
            state (State): The current state
        
        Returns:
            action (Action): The selected action. Should be one from the list of state.legal_actions
        """
        step = Step(state, None, 0)

        tree = TreeMCTS(TreeMCTS.node_class(step, self.main_player),
                        self.game_def, self.main_player)
        try:
            tree.run_mcts(10000)
        except TimeoutError:
            log.debug("Reached timeout error for mcts, computation will stop")

        action = tree.get_best_action(tree.root)
        action_ex = [l_a for l_a in state.legal_actions if l_a == action][0]
        return action_ex
예제 #9
0
    def simulate(game_def,
                 players,
                 depth=None,
                 ran_init=False,
                 signal_on=True):
        """
        Call it with the path to the game definition

        Args:
            players (Player,Player): A tuple of the players

            depth:
                - n: Generate until depth n or terminal state reached
        """
        def handler(signum, frame):
            raise TimeoutError("Action time out")

        if signal_on: signal.signal(signal.SIGALRM, handler)
        if (ran_init):
            initial = game_def.get_random_initial()
        else:
            initial = game_def.initial
        state = StateExpanded.from_game_def(game_def,
                                            initial,
                                            strategy=players[0].strategy)
        match = Match([])
        time_step = 0
        continue_depth = True if depth == None else time_step < depth
        log.debug("\n--------------- Simulating match ----------------")
        log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name))

        letters = ['a', 'b']
        response_times = {'a': [], 'b': []}
        while (not state.is_terminal and continue_depth):
            if signal_on: signal.alarm(3)
            t0 = time.time()
            try:
                selected_action = players[time_step % 2].choose_action(state)
            except TimeoutError as ex:
                log.info(
                    "Time out for player {}, choosing random action".format(
                        letters[time_step % 2]))
                index = randint(0, len(state.legal_actions) - 1)
                selected_action = state.legal_actions[index]
            if signal_on: signal.alarm(0)
            t1 = time.time()
            response_times[letters[time_step % 2]].append(
                round((t1 - t0) * 1000, 3))
            step = Step(state, selected_action, time_step)
            match.add_step(step)
            time_step += 1
            continue_depth = True if depth == None else time_step < depth
            state = state.get_next(selected_action,
                                   strategy_path=players[time_step %
                                                         2].strategy)
        match.add_step(Step(state, None, time_step))
        log.debug(match)
        return match, {
            k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3)
            for k, lst in response_times.items()
        }
예제 #10
0
    def generate_from(cls,game_def,net,state):
        """
        Generates a tree with the predictions of the network.
        Will generate as children all the legal actions and also the illegal actions
        with higher probabilities than the legal ones. Will only further 
        open legal actions
        """
        log.debug("Generating net tree...")
        root = TreeNet.node_class(Step(state,None,0),"a",dic={"is_legal":1,"p":1,"v":0})
        tree = TreeNet(root,game_def,net)
        set_visited = set()
        current_nodes = [root]
        it = 0
        while(len(current_nodes)>0):
            it+=1
            new_nodes = []
        
            for n in current_nodes:
                s = n.step.state
                if s.is_terminal:
                    #Dont expand terminal nodes
                    continue
                if not n.is_legal:
                    #Dont expand illegal moves
                    continue
                if n.step.action is None:
                    #Case for root node
                    state = n.step.state
                else:
                    state = n.step.next_state()
                if state.is_terminal:
                    pi, v = net.predict_pi_v(state)
                    n.v=v
                    continue
                pi, v = net.predict_pi_v(state)
                n.v=v
                legal_actions_masked = game_def.encoder.mask_legal_actions(state)
                
                illegal_print_th = 0.001 #Only illegal states with more than this amout in diference will be printed
                general_print_th = 0.001 #Only states with at least this prob will be printed
                max_prob = pi[np.argmax(legal_actions_masked*pi)]+illegal_print_th

                
                for i,p in enumerate(pi):
                    if p<general_print_th:
                        continue
                
                    if p<=max_prob and legal_actions_masked[i]==0:
                        continue
                    
                    action_str= str(game_def.encoder.all_actions[i])
                    if legal_actions_masked[i]==0:
                        action = Action.from_facts("does({},{}).".format(state.control,action_str),game_def)
                    else:
                        action = state.get_legal_action_from_str(action_str) 
                    
                    step = Step(state,action,n.step.time_step+1)
                    step_hash = step.__hash__()
                    if step_hash in set_visited:
                        continue
                    node = TreeNet.node_class(step,"a",parent=n,dic={"is_legal":legal_actions_masked[i]==1,"p":p,"v":0})
                    new_nodes.append(node)
                    set_visited.add(step_hash)
            current_nodes = new_nodes
        return tree
예제 #11
0
    def simulate(game_def,
                 players,
                 depth=None,
                 time_out_sec=None,
                 penalize_illegal=False):
        """
        Call it with the path to the game definition

        Args:
            players (Player,Player): A tuple of the players

            depth: Generate until depth or terminal state reached
            time_out_sec: The number of seconds the player will have to make a move
            penalize_illegal: True if a selection of an illegal action should be highly
                              penalized by the player
        """
        signal_on = not time_out_sec is None

        def handler(signum, frame):
            raise TimeoutError("Action time out")

        if signal_on: signal.signal(signal.SIGALRM, handler)
        initial = game_def.initial
        state = StateExpanded.from_game_def(game_def,
                                            initial,
                                            strategy=players[0].strategy)
        match = Match([])
        time_step = 0
        continue_depth = True if depth == None else time_step < depth
        log.debug("\n--------------- Simulating match ----------------")
        log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name))

        letters = ['a', 'b']
        response_times = {'a': [], 'b': []}
        while (not state.is_terminal and continue_depth):
            current_control = letters[time_step % 2]
            if signal_on: signal.alarm(time_out_sec)
            t0 = time.time()
            try:
                selected_action = players[time_step % 2].choose_action(
                    state,
                    time_step=time_step,
                    penalize_illegal=penalize_illegal)
            except TimeoutError as ex:
                log.debug(
                    "Time out for player {}, choosing random action".format(
                        current_control))
                index = randint(0, len(state.legal_actions) - 1)
                selected_action = state.legal_actions[index]
            except IllegalActionError as ex:
                log.debug(
                    "Player {}, choosing illegal action {} in step {} -> Match lost"
                    .format(players[time_step % 2].name, str(ex.action),
                            time_step))
                state.is_terminal = True
                state.goals = {
                    current_control: -1,
                    letters[(time_step + 1) % 2]: +1,
                }
                selected_action = None
                match.illegal_lost = {
                    "player": current_control,
                    "time_step": time_step
                }
            if signal_on: signal.alarm(0)
            t1 = time.time()
            response_times[current_control].append(round((t1 - t0) * 1000, 3))
            step = Step(state, selected_action, time_step)
            match.add_step(step)
            time_step += 1
            continue_depth = True if depth == None else time_step < depth
            if not selected_action is None:
                state = state.get_next(selected_action,
                                       strategy_path=players[time_step %
                                                             2].strategy)
        match.add_step(Step(state, None, time_step))
        log.debug(match)
        return match, {
            k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3)
            for k, lst in response_times.items()
        }