Example #1
0
    def run(self, target_points):
        logger.warning("Running {}".format(self.name))
        start_t = time()
        start_ftime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

        players_vs_string = ("""
0: {0}
2: {2} 
        VS.
1: {1}
3: {3}
""").format(*[p.info for p in self.agents])

        logger.info("Playing: " + players_vs_string)

        game_res = self._run_game(target_points=target_points)

        end_ftime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

        results_string = ("""
################################## {me.name} ##################################
Log-folder: {log_folder}
Start-time: {start_time}
End-time: {end_time}
Duration: {duration}

{players_vs_string}

Final Points: {points}

""").format(me=self,
            log_folder=log_folder_name,
            start_time=start_ftime,
            end_time=end_ftime,
            duration=time_since(start_t),
            players_vs_string=players_vs_string,
            points=game_res.points)

        game_history_string = str(game_res.history)

        logger.info(results_string)
        logger.debug(game_history_string)

        with open(log_folder_name + "/results.log", "a") as f:
            f.write(results_string)

        logger.warning("Finished Running {}".format(self.name))
Example #2
0
    def search(self,
               root_state: TichuState,
               max_depth: int,
               cheat: bool = True) -> PlayerAction:
        assert cheat, "cheat=False is not implemented"
        assert max_depth < 11  # Laptop Memory can't handle a depth of 11

        start_t = time()
        self.maxdepth = max_depth
        # possible actions
        asts = list(self.action_state_transisions(root_state))
        if len(asts) == 1:
            logger.debug(
                "result of minimax: only one action; --> action:{}".format(
                    asts[0][0]))
            return asts[0][0]

        # sort actions for better pruning
        asts_sorted = sorted(
            asts,
            key=lambda a_s: float("inf")
            if isinstance(a_s[0], PassAction) else len(a_s[
                0].combination))  # sort: low combinations first, Passing last.

        # start minimax search
        res = [(a,
                self.min_value(state=s.copy_discard_history(),
                               alpha=-float("inf"),
                               beta=float("inf"),
                               depth=0,
                               playerpos=root_state.player_pos))
               for a, s in asts_sorted]
        action, val = max(res, key=itemgetter(1))
        logger.debug("Minimax search val: {}, time: {}".format(
            val, time_since(start_t)))
        return action
Example #3
0
    except AttributeError:
        # exp is probably a MultipleExperiments instance
        expname = exp.name
    logger.warning("exp: {}; args: {}".format(expname, args))

    # run several experiments in multiple processors
    pool_size = args.pool_size
    if nbr_exp_left > 1 and pool_size > 1:
        with Pool(processes=pool_size) as pool:
            logger.warning(
                "Running experiments in Pool (of size {})".format(pool_size))
            # run all experiments in Pool
            multiple_results = list()
            for i in range(nbr_exp_left):
                multiple_results.append(
                    pool.apply_async(exp().run, (),
                                     {'target_points': args.target_points}))
            # wait for processes to complete
            for res in multiple_results:
                res.get()
                nbr_exp_left -= 1

    # run experiment in parent process
    while (nbr_exp_left > 0 or time() < min_t) and time() < max_t:
        logger.warning("Running a experiment in parent process... ")
        nbr_exp_left -= 1
        exp().run(target_points=args.target_points)

    logger.info("Total Experiments runningtime: {}".format(
        time_since(start_t)))
Example #4
0
    logger.warning("Experiment summary: ")
    try:
        expname = exp.__name__
    except AttributeError:
        # exp is probably a MultipleExperiments instance
        expname = exp.name
    logger.warning("exp: {}; args: {}".format(expname, args))

    # run several experiments in multiple processors
    pool_size = args.pool_size
    if nbr_exp_left > 1 and pool_size > 1:
        with Pool(processes=pool_size) as pool:
            logger.warning("Running experiments in Pool (of size {})".format(pool_size))
            # run all experiments in Pool
            multiple_results = list()
            for i in range(nbr_exp_left):
                multiple_results.append(pool.apply_async(exp().run, (), {'target_points': args.target_points}))
            # wait for processes to complete
            for res in multiple_results:
                res.get()
                nbr_exp_left -= 1

    # run experiment in parent process
    while (nbr_exp_left > 0 or time() < min_t) and time() < max_t:
        logger.warning("Running a experiment in parent process... ")
        nbr_exp_left -= 1
        exp().run(target_points=args.target_points)

    logger.info("Total Experiments runningtime: {}".format(time_since(start_t)))

Example #5
0
    def _start_round(self)->Tuple[Tuple[int, int], History]:
        start_t = time()
        console_logger.info("[ROUND START] Start round...")

        curr_state, reward, done, info = self._setup_round()
        assert reward == (0, 0, 0, 0)
        assert done is False
        logger.debug("Set up state: {}".format(curr_state))

        console_logger.info("Player {} has the MAHJONG and can start.".format(curr_state.player_pos))

        while not done:
            loop_start_t = time()

            first_action = curr_state.possible_actions_list[0]
            chosen_action = None
            # Note: for both tichu and wish action, state.player_pos is not the same as action.player_pos, it is the pos of the next player to play a combination
            # TODO assert that all actions are of the same type
            # TODO find the appropriate action and call next_state with it?

            # TICHU
            if isinstance(first_action, TichuAction):
                announce = self._agents[first_action.player_pos].announce_tichu(state=curr_state, already_announced=curr_state.announced_tichu, player=first_action.player_pos)
                if announce:
                    console_logger.info("[TICHU] announced by {}".format(first_action.player_pos))
                else:
                    console_logger.debug("{} does not announce a tichu".format(first_action.player_pos))
                chosen_action = TichuAction(player_pos=first_action.player_pos, announce_tichu=announce)

            # WISH
            elif isinstance(first_action, WishAction):
                wish = self._agents[first_action.player_pos].make_wish(state=curr_state, player=first_action.player_pos)
                console_logger.info("[WISH] {} by {}".format(wish, first_action.player_pos))
                chosen_action = WishAction(player_pos=first_action.player_pos, wish=wish)

            # TRICK ENDS
            elif isinstance(first_action, WinTrickAction):
                console_logger.info("[WIN TRICK] by {}".format(first_action.player_pos))
                chosen_action = first_action
                if isinstance(first_action, GiveDragonAwayAction):
                    to_player = self._agents[first_action.player_pos].give_dragon_away(state=curr_state, player=first_action.player_pos)
                    console_logger.info("[DRAGON AWAY] {} gives the dragon trick to {}".format(first_action.player_pos, to_player))
                    chosen_action = GiveDragonAwayAction(player_from=first_action.player_pos, player_to=to_player, trick=first_action.trick)

            # PLAY COMBINATION OR PASS
            else:
                current_player = curr_state.player_pos
                console_logger.info("[NEXT TO PLAY] Player{}'s turn to play on: {}".format(current_player, curr_state.trick_on_table.last_combination))
                console_logger.debug("with handcards: {}".format(curr_state.handcards[current_player]))

                # the agent chooses an action
                chosen_action = self._agents[current_player].action(curr_state)

                if isinstance(chosen_action, PassAction):
                    console_logger.info("[PASS] {}".format(current_player))
                else:
                    console_logger.info("[PLAY] {} plays {}".format(current_player, chosen_action))

                console_logger.debug("[Time: {}]".format(time_since(since=loop_start_t)))

            # APPLY THE ACTION
            curr_state, reward, done, info = self.env.step(chosen_action)
            if len(curr_state.handcards[current_player]) == 0:
                console_logger.info("[FINISH] player {} just finished. -> new ranking: {}".format(current_player, curr_state.ranking))

            console_logger.debug("Trick on table is now: {}".format(curr_state.trick_on_table))
            console_logger.debug("Current Handcards: \n{}".format(curr_state.handcards))

            console_logger.info("Combination on table is {}: {}".format('now' if isinstance(chosen_action, PlayCombination) or curr_state.trick_on_table.is_empty() else 'still', curr_state.trick_on_table.last_combination))

        console_logger.debug("Final State: {}".format(curr_state))
        points = (reward[0], reward[1])
        console_logger.warning("[ROUND END] Round ended: ranking: {}, outcome: {} [Time: {}]".format(curr_state.ranking, points, time_since(since=start_t)))
        return GameOutcome(points, curr_state.history)
Example #6
0
    def start_game(self, target_points=1000)->Tuple[Tuple[int, int], List[History]]:
        """
        Starts the tichu game
        Returns a tuple containing the points the two teams made
        """
        with error_logged(logger):  # log all raised errors
            start_t = time()
            console_logger.info("Starting game... target: {}".format(target_points))

            round_histories = list()
            nbr_errors = 0
            nbr_errors_to_ignore = 99

            points = (0, 0)

            while points[0] < target_points and points[1] < target_points:
                # run rounds until there is a winner
                try:
                    round_points, round_history = self._start_round()
                    round_histories.append(round_history)
                    points = (round_points[0] + points[0], round_points[1] + points[1])
                    console_logger.warning("=========================================")
                    console_logger.warning("Intermediate Result: {}".format(points))
                    console_logger.warning("=========================================")
                except Exception as err:
                    # log the 10 first errors, but continue with next round.
                    nbr_errors += 1
                    if nbr_errors > nbr_errors_to_ignore:
                        raise
                    else:
                        logger.error("There was en error while running a round. Next {} errors will be ignored.".format(nbr_errors_to_ignore-nbr_errors))
                        logger.exception(err)

            console_logger.info("[GAME END] Game ended: {p} [Nbr_Errors: {nbr_errs}, Time: {time_passed}]".format(p=points, nbr_errs=nbr_errors, time_passed=time_since(since=start_t)))

        return GameOutcome(points, round_histories)
Example #7
0
    gym.undo_logger_setup()

    description = '{agentinfo}_{envn}'.format(
        agentinfo=AGENT.__class__.__name__, envn=args.env)

    # Folders
    parent_folder = '/'.join(
        os.path.dirname(os.path.realpath(__file__)).split("/")[:-1])
    train_base_folder = '{parent_folder}/nn_training/{descr}_{t}_steps_{nbr}'.format(
        parent_folder=parent_folder,
        t=start_ftime,
        nbr=NBR_STEPS,
        descr=description)

    log_folder_name = "{base}/my_logs".format(base=train_base_folder)

    # Logging
    logging_mode = logginginit.DebugMode if args.debug else logginginit.TrainMode
    logginginit.initialize_loggers(output_dir=log_folder_name,
                                   logging_mode=logging_mode,
                                   min_loglevel=logging.DEBUG)

    # Training
    print("Training Agent ({}) for {} steps ...".format(
        AGENT.__class__.__name__, NBR_STEPS))

    AGENT.train(env=ENV, base_folder=train_base_folder, nbr_steps=NBR_STEPS)

    print("Training time: {}".format(time_since(start_t)))