def run(self, target_points): logger.warning("Running {}".format(self.name)) start_t = time() start_ftime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") players_vs_string = (""" 0: {0} 2: {2} VS. 1: {1} 3: {3} """).format(*[p.info for p in self.agents]) logger.info("Playing: " + players_vs_string) game_res = self._run_game(target_points=target_points) end_ftime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") results_string = (""" ################################## {me.name} ################################## Log-folder: {log_folder} Start-time: {start_time} End-time: {end_time} Duration: {duration} {players_vs_string} Final Points: {points} """).format(me=self, log_folder=log_folder_name, start_time=start_ftime, end_time=end_ftime, duration=time_since(start_t), players_vs_string=players_vs_string, points=game_res.points) game_history_string = str(game_res.history) logger.info(results_string) logger.debug(game_history_string) with open(log_folder_name + "/results.log", "a") as f: f.write(results_string) logger.warning("Finished Running {}".format(self.name))
def search(self, root_state: TichuState, max_depth: int, cheat: bool = True) -> PlayerAction: assert cheat, "cheat=False is not implemented" assert max_depth < 11 # Laptop Memory can't handle a depth of 11 start_t = time() self.maxdepth = max_depth # possible actions asts = list(self.action_state_transisions(root_state)) if len(asts) == 1: logger.debug( "result of minimax: only one action; --> action:{}".format( asts[0][0])) return asts[0][0] # sort actions for better pruning asts_sorted = sorted( asts, key=lambda a_s: float("inf") if isinstance(a_s[0], PassAction) else len(a_s[ 0].combination)) # sort: low combinations first, Passing last. # start minimax search res = [(a, self.min_value(state=s.copy_discard_history(), alpha=-float("inf"), beta=float("inf"), depth=0, playerpos=root_state.player_pos)) for a, s in asts_sorted] action, val = max(res, key=itemgetter(1)) logger.debug("Minimax search val: {}, time: {}".format( val, time_since(start_t))) return action
except AttributeError: # exp is probably a MultipleExperiments instance expname = exp.name logger.warning("exp: {}; args: {}".format(expname, args)) # run several experiments in multiple processors pool_size = args.pool_size if nbr_exp_left > 1 and pool_size > 1: with Pool(processes=pool_size) as pool: logger.warning( "Running experiments in Pool (of size {})".format(pool_size)) # run all experiments in Pool multiple_results = list() for i in range(nbr_exp_left): multiple_results.append( pool.apply_async(exp().run, (), {'target_points': args.target_points})) # wait for processes to complete for res in multiple_results: res.get() nbr_exp_left -= 1 # run experiment in parent process while (nbr_exp_left > 0 or time() < min_t) and time() < max_t: logger.warning("Running a experiment in parent process... ") nbr_exp_left -= 1 exp().run(target_points=args.target_points) logger.info("Total Experiments runningtime: {}".format( time_since(start_t)))
logger.warning("Experiment summary: ") try: expname = exp.__name__ except AttributeError: # exp is probably a MultipleExperiments instance expname = exp.name logger.warning("exp: {}; args: {}".format(expname, args)) # run several experiments in multiple processors pool_size = args.pool_size if nbr_exp_left > 1 and pool_size > 1: with Pool(processes=pool_size) as pool: logger.warning("Running experiments in Pool (of size {})".format(pool_size)) # run all experiments in Pool multiple_results = list() for i in range(nbr_exp_left): multiple_results.append(pool.apply_async(exp().run, (), {'target_points': args.target_points})) # wait for processes to complete for res in multiple_results: res.get() nbr_exp_left -= 1 # run experiment in parent process while (nbr_exp_left > 0 or time() < min_t) and time() < max_t: logger.warning("Running a experiment in parent process... ") nbr_exp_left -= 1 exp().run(target_points=args.target_points) logger.info("Total Experiments runningtime: {}".format(time_since(start_t)))
def _start_round(self)->Tuple[Tuple[int, int], History]: start_t = time() console_logger.info("[ROUND START] Start round...") curr_state, reward, done, info = self._setup_round() assert reward == (0, 0, 0, 0) assert done is False logger.debug("Set up state: {}".format(curr_state)) console_logger.info("Player {} has the MAHJONG and can start.".format(curr_state.player_pos)) while not done: loop_start_t = time() first_action = curr_state.possible_actions_list[0] chosen_action = None # Note: for both tichu and wish action, state.player_pos is not the same as action.player_pos, it is the pos of the next player to play a combination # TODO assert that all actions are of the same type # TODO find the appropriate action and call next_state with it? # TICHU if isinstance(first_action, TichuAction): announce = self._agents[first_action.player_pos].announce_tichu(state=curr_state, already_announced=curr_state.announced_tichu, player=first_action.player_pos) if announce: console_logger.info("[TICHU] announced by {}".format(first_action.player_pos)) else: console_logger.debug("{} does not announce a tichu".format(first_action.player_pos)) chosen_action = TichuAction(player_pos=first_action.player_pos, announce_tichu=announce) # WISH elif isinstance(first_action, WishAction): wish = self._agents[first_action.player_pos].make_wish(state=curr_state, player=first_action.player_pos) console_logger.info("[WISH] {} by {}".format(wish, first_action.player_pos)) chosen_action = WishAction(player_pos=first_action.player_pos, wish=wish) # TRICK ENDS elif isinstance(first_action, WinTrickAction): console_logger.info("[WIN TRICK] by {}".format(first_action.player_pos)) chosen_action = first_action if isinstance(first_action, GiveDragonAwayAction): to_player = self._agents[first_action.player_pos].give_dragon_away(state=curr_state, player=first_action.player_pos) console_logger.info("[DRAGON AWAY] {} gives the dragon trick to {}".format(first_action.player_pos, to_player)) chosen_action = GiveDragonAwayAction(player_from=first_action.player_pos, player_to=to_player, trick=first_action.trick) # PLAY COMBINATION OR PASS else: current_player = curr_state.player_pos console_logger.info("[NEXT TO PLAY] Player{}'s turn to play on: {}".format(current_player, curr_state.trick_on_table.last_combination)) console_logger.debug("with handcards: {}".format(curr_state.handcards[current_player])) # the agent chooses an action chosen_action = self._agents[current_player].action(curr_state) if isinstance(chosen_action, PassAction): console_logger.info("[PASS] {}".format(current_player)) else: console_logger.info("[PLAY] {} plays {}".format(current_player, chosen_action)) console_logger.debug("[Time: {}]".format(time_since(since=loop_start_t))) # APPLY THE ACTION curr_state, reward, done, info = self.env.step(chosen_action) if len(curr_state.handcards[current_player]) == 0: console_logger.info("[FINISH] player {} just finished. -> new ranking: {}".format(current_player, curr_state.ranking)) console_logger.debug("Trick on table is now: {}".format(curr_state.trick_on_table)) console_logger.debug("Current Handcards: \n{}".format(curr_state.handcards)) console_logger.info("Combination on table is {}: {}".format('now' if isinstance(chosen_action, PlayCombination) or curr_state.trick_on_table.is_empty() else 'still', curr_state.trick_on_table.last_combination)) console_logger.debug("Final State: {}".format(curr_state)) points = (reward[0], reward[1]) console_logger.warning("[ROUND END] Round ended: ranking: {}, outcome: {} [Time: {}]".format(curr_state.ranking, points, time_since(since=start_t))) return GameOutcome(points, curr_state.history)
def start_game(self, target_points=1000)->Tuple[Tuple[int, int], List[History]]: """ Starts the tichu game Returns a tuple containing the points the two teams made """ with error_logged(logger): # log all raised errors start_t = time() console_logger.info("Starting game... target: {}".format(target_points)) round_histories = list() nbr_errors = 0 nbr_errors_to_ignore = 99 points = (0, 0) while points[0] < target_points and points[1] < target_points: # run rounds until there is a winner try: round_points, round_history = self._start_round() round_histories.append(round_history) points = (round_points[0] + points[0], round_points[1] + points[1]) console_logger.warning("=========================================") console_logger.warning("Intermediate Result: {}".format(points)) console_logger.warning("=========================================") except Exception as err: # log the 10 first errors, but continue with next round. nbr_errors += 1 if nbr_errors > nbr_errors_to_ignore: raise else: logger.error("There was en error while running a round. Next {} errors will be ignored.".format(nbr_errors_to_ignore-nbr_errors)) logger.exception(err) console_logger.info("[GAME END] Game ended: {p} [Nbr_Errors: {nbr_errs}, Time: {time_passed}]".format(p=points, nbr_errs=nbr_errors, time_passed=time_since(since=start_t))) return GameOutcome(points, round_histories)
gym.undo_logger_setup() description = '{agentinfo}_{envn}'.format( agentinfo=AGENT.__class__.__name__, envn=args.env) # Folders parent_folder = '/'.join( os.path.dirname(os.path.realpath(__file__)).split("/")[:-1]) train_base_folder = '{parent_folder}/nn_training/{descr}_{t}_steps_{nbr}'.format( parent_folder=parent_folder, t=start_ftime, nbr=NBR_STEPS, descr=description) log_folder_name = "{base}/my_logs".format(base=train_base_folder) # Logging logging_mode = logginginit.DebugMode if args.debug else logginginit.TrainMode logginginit.initialize_loggers(output_dir=log_folder_name, logging_mode=logging_mode, min_loglevel=logging.DEBUG) # Training print("Training Agent ({}) for {} steps ...".format( AGENT.__class__.__name__, NBR_STEPS)) AGENT.train(env=ENV, base_folder=train_base_folder, nbr_steps=NBR_STEPS) print("Training time: {}".format(time_since(start_t)))