Esempio n. 1
0
    def step(self, player_str, action_idx, next_strategy=None):
        log.debug(
            paint("\n----------- Performing GAME step -----------",
                  bcolors.REF))
        action_str = self.game_def.encoder.all_actions[action_idx]

        try:
            legal_action = self.current_state.get_legal_action_from_str(
                action_str)
            log.debug(legal_action)
        except RuntimeError as err:  #Acounts for Ilegal Action
            log.debug(paint("\tSelected non legal action", bcolors.FAIL))
            player_reward = -100
            log.debug(
                paint_bool(
                    "••••••• EPISODE FINISHED Reward:{} •••••••".format(
                        player_reward), player_reward > 0))
            return self.current_observation, player_reward, True, {}
        #Construct next state
        self.match.add_step(
            Step(self.current_state, legal_action, len(self.match.steps)))
        next_state = self.current_state.get_next(legal_action,
                                                 strategy_path=next_strategy)
        self.current_state = next_state

        #Get information from next state
        done = self.current_state.is_terminal
        goals_dic = self.current_rewards
        player_reward = goals_dic[player_str]
        if (done):
            log.debug(
                paint_bool(
                    "••••••• EPISODE FINISHED Reward:{} •••••••".format(
                        player_reward), player_reward > 0))
        return self.current_observation, player_reward, done, {}
Esempio n. 2
0
    def choose_action(self, state):
        """
        The player chooses an action given a current state.

        Args:
            state (State): The current state
        
        Returns:
            action (Action): The selected action. Should be one from the list of state.legal_actions
        """
        state_facts = state.to_facts()

        if state_facts in self.tree_scores:
            opt = self.tree_scores[state_facts].items()
            if self.scores_main_player == self.main_player:
                best = max(opt, key=lambda i: i[1])
            else:
                best = min(opt, key=lambda i: i[1])
            action = Action.from_facts(best[0], self.game_def)
        else:
            log.debug(
                "Minmax has no information in tree for current step, choosing random"
            )
            index = randint(0, len(state.legal_actions) - 1)
            return state.legal_actions[index]

        action = [l_a for l_a in state.legal_actions if l_a == action][0]
        return action
Esempio n. 3
0
    def choose_action(self, state):
        """
        The player chooses an action given a current state.

        Args:
            state (State): The current state
        
        Returns:
            action (Action): The selected action. Should be one from the list of state.legal_actions
        """
        if self.style == "tree":
            # Using tree
            state_facts = state.to_facts()
            if state_facts in self.tree_scores:
                opt = self.tree_scores[state_facts].items()
                if self.scores_main_player == self.main_player:
                    best = max(opt, key=lambda i: i[1])
                else:
                    best = min(opt, key=lambda i: i[1])
                action = Action.from_facts(best[0], self.game_def)
            else:
                log.debug(
                    "Minmax has no information in tree for current step, choosing random"
                )
                index = randint(0, len(state.legal_actions) - 1)
                return state.legal_actions[index]

            action = [l_a for l_a in state.legal_actions if l_a == action][0]
            return action
        elif self.style == "rule":
            return state.legal_actions[-1]
        else:
            print("Learning")
            # Using rules
            initial = fluents_to_asp_syntax(state.fluents, 0)
            match, tree, ex, ls, tl = get_minmax_init(self.game_def,
                                                      self.main_player,
                                                      initial,
                                                      extra_fixed="\n".join(
                                                          self.learned),
                                                      learning_rules=True)
            self.learned.extend(ls)
            if (len(ls) > 0):
                log.info("{} learned new rules during game play".format(
                    self.name))
            if match is None:
                raise TimeoutError
            action = match.steps[0].action.action

        action_name = str(action)
        action = [
            l_a for l_a in state.legal_actions
            if str(l_a.action) == action_name
        ][0]
        return action
Esempio n. 4
0
 def from_name(cls, name,initial=None,constants={}):
     """
     Creates a game definition of the subclass whose folder matches the 
     argument 'name'
     Args:
         name: the name of the folder containing the class definition
         initial: an optional initial state
         constants: the constants to be passed to clingo for this game
     """
     games = game_def_sub_classes()
     if name in games:
         log.debug("Creating game definition from class {}".format(games[name].__name__))
         return games[name](name,initial,constants)
     
     raise RuntimeError("No folder inside game_definitions matched {}".format(name))
Esempio n. 5
0
    def from_name_style(cls, game_def, name_style, main_player):
        """
        Creates a player by finding the Player's subclass matching the
        name_style.
        Args:
            game_def (GameDef): The game_definition used
            name_style (str): String to match with the function match_name_style
                              of the subclass
            main_player (str): String with the name of the main player 
        """
        approaches = player_approaches_sub_classes()
        for n, c in approaches.items():
            if c.match_name_style(name_style):
                log.debug("Creating player from class {}".format(c.__name__))
                return c(game_def, name_style, main_player)

        raise RuntimeError(
            "No subclass of Player matched {}".format(name_style))
Esempio n. 6
0
 def build(game_def, args):
     """
     Runs the required computation to build a player. For instance, creating a tree or 
     training a model. 
     The computed information should be stored to be accessed latter on using the name_style
     Args:
         game_def (GameDef): The game definition used for the creation
         args (NameSpace): A name space with all the attributes defined in add_parser_build_args
     """
     log.debug("Computing normal minmax for tree")
     tree = minmax_from_game_def(game_def, main_player=args.main_player)
     t0 = time.time()
     if (not args.tree_image_file_name is None):
         file_name = '{}/{}'.format(game_def.name,
                                    args.tree_image_file_name)
         tree.print_in_file(file_name=file_name)
         log.debug("Tree image saved in {}".format(file_name))
     n_nodes = tree.get_number_of_nodes()
     if (not args.tree_name is None):
         file_path = "./approaches/minmax/trees/{}/{}".format(
             game_def.name, args.tree_name)
         tree.save_scores_in_file(file_path)
         log.debug("Tree saved in {}".format(file_path))
     t1 = time.time()
     save_time = round((t1 - t0) * 1000, 3)
     return {'number_of_nodes': n_nodes, 'save_time': save_time}
Esempio n. 7
0
    def choose_action(self, state):
        """
        The player chooses an action given a current state.

        Args:
            state (State): The current state
        
        Returns:
            action (Action): The selected action. Should be one from the list of state.legal_actions
        """
        step = Step(state, None, 0)

        tree = TreeMCTS(TreeMCTS.node_class(step, self.main_player),
                        self.game_def, self.main_player)
        try:
            tree.run_mcts(10000)
        except TimeoutError:
            log.debug("Reached timeout error for mcts, computation will stop")

        action = tree.get_best_action(tree.root)
        action_ex = [l_a for l_a in state.legal_actions if l_a == action][0]
        return action_ex
Esempio n. 8
0
def build_minimax(tree, main_player="a"):
    """
    Function to review and annotate tree with minimax scores

    Args:
        tree (anytree.Node): a tree with scores on its leaves
        main_player (str): The player to maximize
    Returns:
        tree (anytree.Node): minimax-annotated version of tree
    """
    disable_tqdm = log.is_disabled_for('debug')
    log.debug("tracking minimax scores recursively")
    # work recursively backwards to fill up slots
    for node in tqdm(list(
            reversed(list(LevelOrderIter(tree.root,
                                         maxlevel=tree.root.height)))),
                     disable=disable_tqdm):
        scores = [child.score for child in node.children if child != ()]
        if node.score == None:
            if node.children[0].step.state.control == main_player:
                node.score = max(scores)
            else:
                node.score = min(scores)
    return tree
Esempio n. 9
0
def expand_root(tree, main_player="a"):
    """
    Function to expand a tree downwards until terminal leaves in place

    Args:
        tree (anytree.Node): a tree to expand till its terminal leaves
    """
    disable_tqdm = log.is_disabled_for('debug')
    valid_moves = tree.step.state.legal_actions
    for legal_action in valid_moves:
        step = Step(tree.step.state, legal_action, 1)
        TreeMinmax.node_class(step, main_player, parent=tree)
    expand_further = True
    time_step = 2
    while expand_further:
        # define current player
        expand_further = False
        # starting iteration to fill branches
        log.debug("Depth: %s" % (time_step))
        for leaf in tqdm(tree.leaves, disable=disable_tqdm):
            current_state = leaf.step.state
            if current_state.is_terminal:
                continue

            next_state = current_state.get_next(leaf.step.action)
            valid_moves = next_state.legal_actions
            for legal_action in valid_moves:
                step = Step(next_state, legal_action, time_step)
                TreeMinmax.node_class(step, main_player, parent=leaf)
                expand_further = True

            if next_state.is_terminal:
                goals = next_state.goals
                leaf.score = goals[main_player]

        time_step += 1
Esempio n. 10
0
    def build(game_def, args):
        """
        Runs the required computation to build a player. For instance, creating a tree or 
        training a model. 
        The computed information should be stored to be accessed latter on using the name_style
        Args:
            game_def (GameDef): The game definition used for the creation
            args (NameSpace): A name space with all the attributes defined in add_parser_build_args
        """
        if not 'first_build' in args:
            log.debug("Creating new files")
            new_files = 'w'
            args.first_build = False
        else:
            log.debug("Appending to existent files")
            new_files = 'a'

        log.debug("Computing mcts for tree")
        state = game_def.get_initial_state()
        root = TreeMCTS.node_class(Step(state, None, 0), args.main_player)
        tree = TreeMCTS(root, game_def, args.main_player)
        tree.run_mcts(args.iter)
        t0 = time.time()
        if (not args.tree_image_file_name is None):
            file_name = '{}/{}'.format(game_def.name,
                                       args.tree_image_file_name)
            tree.print_in_file(file_name=file_name)
            log.debug("Tree image saved in {}".format(file_name))
        n_nodes = tree.get_number_of_nodes()
        if (not args.tree_name is None):
            file_path = "./approaches/mcts/trees/{}/{}".format(
                game_def.name, args.tree_name)
            tree.save_values_in_file(file_path)
            log.debug("Tree saved in {}".format(file_path))
        if (not args.train_file is None):
            file_path = "./approaches/mcts/train/{}/{}".format(
                game_def.name, args.train_file)
            l = tree.get_train_list()
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            training_data_to_csv(file_path,
                                 l,
                                 game_def,
                                 new_files,
                                 extra_array=['p', 'n'])
            log.debug("Training data saved in {}".format(file_path))

        t1 = time.time()
        save_time = round((t1 - t0) * 1000, 3)
        return {'number_of_nodes': n_nodes, 'save_time': save_time}
Esempio n. 11
0
    def build(game_def, args):
        """
        Runs the required computation to build a player. For instance, creating a tree or 
        training a model. 
        The computed information should be stored to be accessed latter on using the name_style
        Args:
            game_def (GameDef): The game definition used for the creation
            args (NameSpace): A name space with all the attributes defined in add_parser_build_args
        """
        if not 'first_build' in args:
            log.debug("Creating new files")
            new_files = 'w'
            args.first_build = False
        else:
            log.debug("Appending to existent files")
            new_files = 'a'
        learn_examples = not (args.ilasp_examples_file_name is None)
        learn_rules = not (args.rules_file_name is None)
        generate_train = not (args.train_file_name is None)

        log.debug("Computing asp minmax for tree")
        log.debug("Initial state: \n{}".format(
            game_def.get_initial_state().ascii))
        initial = game_def.get_initial_time()
        minmax_match, min_max_tree, examples, learned_rules, training_list = get_minmax_init(
            game_def,
            args.main_player,
            initial,
            generating_training=generate_train,
            learning_rules=learn_rules,
            learning_examples=learn_examples)
        log.debug(minmax_match)

        t0 = time.time()

        if learn_examples:
            ilasp_examples_file_name = './approaches/ilasp/{}/examples/{}'.format(
                args.game_name, args.ilasp_examples_file_name)
            os.makedirs(os.path.dirname(ilasp_examples_file_name),
                        exist_ok=True)
            with open(ilasp_examples_file_name, new_files) as text_file:
                text_file.write("\n".join(examples))
                log.debug("ILASP Examples saved in " +
                          ilasp_examples_file_name)

        if learn_rules:
            rules_file = './approaches/pruned_minmax/rules/{}/{}'.format(
                args.game_name, args.rules_file_name)
            os.makedirs(os.path.dirname(rules_file), exist_ok=True)
            with open(rules_file, new_files) as text_file:
                text_file.write("\n".join(learned_rules))
            rules_file_to_gdl(rules_file)
            log.debug("Rules saved in " + rules_file)

        if generate_train:
            train_file = './approaches/ml_agent/train/{}/{}'.format(
                args.game_name, args.train_file_name)
            os.makedirs(os.path.dirname(train_file), exist_ok=True)
            training_data_to_csv(train_file, training_list, game_def,
                                 new_files)
            log.debug("Training data saved in " + train_file)
            remove_duplicates_training(train_file)

        if (not (args.tree_image_file_name is None)):
            image_file_name = '{}/{}'.format(args.game_name,
                                             args.tree_image_file_name)
            min_max_tree.print_in_file(file_name=image_file_name)
        n_nodes = min_max_tree.get_number_of_nodes()
        if (not args.tree_name is None):
            file_path = "./approaches/pruned_minmax/trees/{}/{}".format(
                game_def.name, args.tree_name)
            min_max_tree.save_scores_in_file(file_path)
            log.debug("Tree saved in {}".format(file_path))
        t1 = time.time()
        save_time = round((t1 - t0) * 1000, 3)

        return {'number_of_nodes': n_nodes, 'save_time': save_time}
Esempio n. 12
0
    def simulate(game_def,
                 players,
                 depth=None,
                 ran_init=False,
                 signal_on=True):
        """
        Call it with the path to the game definition

        Args:
            players (Player,Player): A tuple of the players

            depth:
                - n: Generate until depth n or terminal state reached
        """
        def handler(signum, frame):
            raise TimeoutError("Action time out")

        if signal_on: signal.signal(signal.SIGALRM, handler)
        if (ran_init):
            initial = game_def.get_random_initial()
        else:
            initial = game_def.initial
        state = StateExpanded.from_game_def(game_def,
                                            initial,
                                            strategy=players[0].strategy)
        match = Match([])
        time_step = 0
        continue_depth = True if depth == None else time_step < depth
        log.debug("\n--------------- Simulating match ----------------")
        log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name))

        letters = ['a', 'b']
        response_times = {'a': [], 'b': []}
        while (not state.is_terminal and continue_depth):
            if signal_on: signal.alarm(3)
            t0 = time.time()
            try:
                selected_action = players[time_step % 2].choose_action(state)
            except TimeoutError as ex:
                log.info(
                    "Time out for player {}, choosing random action".format(
                        letters[time_step % 2]))
                index = randint(0, len(state.legal_actions) - 1)
                selected_action = state.legal_actions[index]
            if signal_on: signal.alarm(0)
            t1 = time.time()
            response_times[letters[time_step % 2]].append(
                round((t1 - t0) * 1000, 3))
            step = Step(state, selected_action, time_step)
            match.add_step(step)
            time_step += 1
            continue_depth = True if depth == None else time_step < depth
            state = state.get_next(selected_action,
                                   strategy_path=players[time_step %
                                                         2].strategy)
        match.add_step(Step(state, None, time_step))
        log.debug(match)
        return match, {
            k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3)
            for k, lst in response_times.items()
        }
Esempio n. 13
0
    def build(game_def, args):
        """
        Runs the required computation to build a player. For instance, creating a tree or 
        training a model. 
        The computed information should be stored to be accessed latter on using the name_style
        Args:
            game_def (GameDef): The game definition used for the creation
            args (NameSpace): A name space with all the attributes defined in add_parser_build_args
        """
        args.rules_file_name = None
        args.tree_image_file_name = None
        args.train_file_name = None
        args.tree_name = None
        if args.ilasp_examples_file_name is None:
            log.debug("Generating examples using min_max_asp algorithm")
            args.ilasp_examples_file_name = 'temp_examples.las'
            PrunedMinmaxPlayer.build(game_def, args)
        base_path = './approaches/ilasp/{}/'.format(game_def.name)
        lines = []
        with open(args.background_path, 'r') as background_file:
            lines.extend(background_file.readlines())
        with open('{}languages/{}'.format(base_path, args.language_bias_name),
                  'r') as language_bias_file:
            langauage_bias_lines = language_bias_file.readlines()
            lines.extend(langauage_bias_lines)
        with open(
                '{}examples/{}'.format(base_path,
                                       args.ilasp_examples_file_name),
                'r') as examples_file:
            lines.extend(examples_file.readlines())
        with open('{}temporal.las'.format(base_path), 'w') as complete_file:
            complete_file.write("".join(lines))
            complete_file.close()

        if not args.ilasp_arg is None:
            ilasp_args = ["--" + a for a in args.ilasp_arg]
        else:
            ilasp_args = []

        command = [
            "ILASP ", "--clingo5 ", "--version=2i",
            '{}temporal.las'.format(base_path), "--multi-wc ", "--simple",
            "--max-rule-length=6", "--max-wc-length=5", "-ml=5", "-q"
        ]
        command.extend(ilasp_args)

        string_command = " ".join(command)
        log.info("Running ilasp command: \n{}".format(" ".join(command)))
        result = subprocess.check_output(string_command,
                                         shell=True).decode("utf-8")
        log.debug("Found strategy: \n{}".format(result))
        t0 = time.time()

        strategy_file_path = '{}/strategies/{}'.format(base_path,
                                                       args.strategy_name)
        os.makedirs(os.path.dirname(strategy_file_path), exist_ok=True)

        langauage_bias_predicates = [
            l for l in langauage_bias_lines if l[0] != "#"
        ]
        result = result + "".join(langauage_bias_predicates)
        with open(strategy_file_path, 'w') as startegy:
            startegy.write(result)
            startegy.close()
        log.debug("Strategy saved in {}/strategies/{}".format(
            base_path, args.strategy_name))

        t1 = time.time()
        save_time = round((t1 - t0) * 1000, 3)
        return {'save_time': save_time}
Esempio n. 14
0
    def generate_from(cls,game_def,net,state):
        """
        Generates a tree with the predictions of the network.
        Will generate as children all the legal actions and also the illegal actions
        with higher probabilities than the legal ones. Will only further 
        open legal actions
        """
        log.debug("Generating net tree...")
        root = TreeNet.node_class(Step(state,None,0),"a",dic={"is_legal":1,"p":1,"v":0})
        tree = TreeNet(root,game_def,net)
        set_visited = set()
        current_nodes = [root]
        it = 0
        while(len(current_nodes)>0):
            it+=1
            new_nodes = []
        
            for n in current_nodes:
                s = n.step.state
                if s.is_terminal:
                    #Dont expand terminal nodes
                    continue
                if not n.is_legal:
                    #Dont expand illegal moves
                    continue
                if n.step.action is None:
                    #Case for root node
                    state = n.step.state
                else:
                    state = n.step.next_state()
                if state.is_terminal:
                    pi, v = net.predict_pi_v(state)
                    n.v=v
                    continue
                pi, v = net.predict_pi_v(state)
                n.v=v
                legal_actions_masked = game_def.encoder.mask_legal_actions(state)
                
                illegal_print_th = 0.001 #Only illegal states with more than this amout in diference will be printed
                general_print_th = 0.001 #Only states with at least this prob will be printed
                max_prob = pi[np.argmax(legal_actions_masked*pi)]+illegal_print_th

                
                for i,p in enumerate(pi):
                    if p<general_print_th:
                        continue
                
                    if p<=max_prob and legal_actions_masked[i]==0:
                        continue
                    
                    action_str= str(game_def.encoder.all_actions[i])
                    if legal_actions_masked[i]==0:
                        action = Action.from_facts("does({},{}).".format(state.control,action_str),game_def)
                    else:
                        action = state.get_legal_action_from_str(action_str) 
                    
                    step = Step(state,action,n.step.time_step+1)
                    step_hash = step.__hash__()
                    if step_hash in set_visited:
                        continue
                    node = TreeNet.node_class(step,"a",parent=n,dic={"is_legal":legal_actions_masked[i]==1,"p":p,"v":0})
                    new_nodes.append(node)
                    set_visited.add(step_hash)
            current_nodes = new_nodes
        return tree
Esempio n. 15
0
    def build(game_def, args):
        """
        Runs the required computation to build a player. For instance, creating a tree or 
        training a model. 
        The computed information should be stored to be accessed latter on using the name_style
        Args:
            game_def (GameDef): The game definition used for the creation
            args (NameSpace): A name space with all the attributes defined in add_parser_build_args
        """
        best_net = NetAlpha(game_def, args.model_name, model=None, args=args)
        best_net.load_model_from_args()
        game_def.get_random_initial()
        using_random = not args.train_rand is None
        if (using_random):
            log.info(
                "Using random seed {} for initial states in training".format(
                    args.train_rand))
            game_def.get_random_initial()
            initial_states = game_def.random_init
            random.Random(args.train_rand).shuffle(initial_states)
        else:
            log.info("Using default initial state in training {} ".format(
                game_def.initial))
            initial_states = [game_def.initial]

        number_initial = len(
            initial_states) if args.n_vs > len(initial_states) else args.n_vs
        for i in range(args.n_train):
            log.info("------- Iteration {} --------".format(i))
            training_examples = []
            for e in range(args.n_episodes):
                log.debug("\t\tEpisode {}...".format(e))
                new_examples = TreeZero.run_episode(game_def, best_net)
                training_examples += new_examples
                game_def.initial = initial_states[i % len(initial_states)]
            new_net = best_net.copy()

            #Training new net
            log.info("Training net with {} examples".format(
                len(training_examples)))
            new_net.train(training_examples)

            #Comparing nets
            log.info("Comparing networks...")
            p_old = AlphaZero(game_def, "training_old", "a", best_net)
            p_new = AlphaZero(game_def, "training_new", "a", new_net)
            benchmarks = Match.vs(game_def,
                                  args.n_vs, [[p_old, p_new], [p_new, p_old]],
                                  initial_states, ["old_net", "new_net"],
                                  penalize_illegal=args.penalize_illegal)
            log.info(benchmarks)
            new_wins = benchmarks["b"]["wins"]
            old_wins = benchmarks["a"]["wins"]
            log.info(
                "New: Wan {}  Lost Illegal {}\nOld network: Wan {} Lost Illegal {}"
                .format(new_wins, benchmarks["b"]["matches_lost_by_illegal"],
                        old_wins, benchmarks["a"]["matches_lost_by_illegal"]))

            #Updating best net
            if new_wins > old_wins:
                log.info(
                    "{}--------------- New network is better {}vs{}------------------{}"
                    .format(bcolors.FAIL, new_wins, old_wins, bcolors.ENDC))
                best_net = new_net
                best_net.save_model(
                    model_name="{}/{}".format(best_net.model_name, i))
                if args.vis_tree:
                    # Visualizing tree of best net
                    game_def.initial = initial_states[0]
                    state = game_def.get_initial_state()
                    p_new.visualize_net(
                        state,
                        "train-{}-iter-{}-new".format(new_net.model_name, i))

        log.info("Saving model")
        best_net.save_model()
Esempio n. 16
0
    def simulate(game_def,
                 players,
                 depth=None,
                 time_out_sec=None,
                 penalize_illegal=False):
        """
        Call it with the path to the game definition

        Args:
            players (Player,Player): A tuple of the players

            depth: Generate until depth or terminal state reached
            time_out_sec: The number of seconds the player will have to make a move
            penalize_illegal: True if a selection of an illegal action should be highly
                              penalized by the player
        """
        signal_on = not time_out_sec is None

        def handler(signum, frame):
            raise TimeoutError("Action time out")

        if signal_on: signal.signal(signal.SIGALRM, handler)
        initial = game_def.initial
        state = StateExpanded.from_game_def(game_def,
                                            initial,
                                            strategy=players[0].strategy)
        match = Match([])
        time_step = 0
        continue_depth = True if depth == None else time_step < depth
        log.debug("\n--------------- Simulating match ----------------")
        log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name))

        letters = ['a', 'b']
        response_times = {'a': [], 'b': []}
        while (not state.is_terminal and continue_depth):
            current_control = letters[time_step % 2]
            if signal_on: signal.alarm(time_out_sec)
            t0 = time.time()
            try:
                selected_action = players[time_step % 2].choose_action(
                    state,
                    time_step=time_step,
                    penalize_illegal=penalize_illegal)
            except TimeoutError as ex:
                log.debug(
                    "Time out for player {}, choosing random action".format(
                        current_control))
                index = randint(0, len(state.legal_actions) - 1)
                selected_action = state.legal_actions[index]
            except IllegalActionError as ex:
                log.debug(
                    "Player {}, choosing illegal action {} in step {} -> Match lost"
                    .format(players[time_step % 2].name, str(ex.action),
                            time_step))
                state.is_terminal = True
                state.goals = {
                    current_control: -1,
                    letters[(time_step + 1) % 2]: +1,
                }
                selected_action = None
                match.illegal_lost = {
                    "player": current_control,
                    "time_step": time_step
                }
            if signal_on: signal.alarm(0)
            t1 = time.time()
            response_times[current_control].append(round((t1 - t0) * 1000, 3))
            step = Step(state, selected_action, time_step)
            match.add_step(step)
            time_step += 1
            continue_depth = True if depth == None else time_step < depth
            if not selected_action is None:
                state = state.get_next(selected_action,
                                       strategy_path=players[time_step %
                                                             2].strategy)
        match.add_step(Step(state, None, time_step))
        log.debug(match)
        return match, {
            k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3)
            for k, lst in response_times.items()
        }