def decide_chow(self, player, new_tile, choices, neighbors, game): self.begin_decision() fixed_hand, hand = player.fixed_hand, player.hand if self.display_step: self.print_game_board(fixed_hand, hand, neighbors, game) self.print_msg("Someone just discarded a %s."%new_tile.symbol) pg_model = get_MJPGFitted(self.pg_model_path) state = utils.extended_dnn_encode_state(player, neighbors, cpk_tile = new_tile) valid_actions = [34 + decisions_.index("no_action")] for choice in choices: valid_actions.append(34 + decisions_.index("chow_%d"%(choice))) action_filter = np.zeros(n_decisions) action_filter[valid_actions] = 1 action = None if not self.skip_history and self.history_waiting: self.update_transition(state, REWARD_NON_TERMINAL, action_filter) while True: if action is not None and not self.skip_history: self.update_history(state, action, action_filter) self.update_transition(state, REWARD_INVALID_DECISION, action_filter) action, value = pg_model.choose_action(state, action_filter = action_filter, return_value = True, strict_filter = True) if action in valid_actions: break elif not self.is_train: action = random.choice(valid_actions) break if not self.skip_history: self.update_history(state, action, action_filter) self.end_decision() if action == 34 + decisions_.index("no_action"): self.print_msg("%s chooses not to Chow %s [%.2f]."%(self.player_name, new_tile.symbol, value)) return False, None else: chow_tiles_tgstrs = [] chow_tiles_str = "" choice = int(decisions_[action - 34].split("_")[1]) for i in range(choice - 1, choice + 2): neighbor_tile = new_tile.generate_neighbor_tile(i) chow_tiles_str += neighbor_tile.symbol chow_tiles_tgstrs.append(neighbor_tile.get_display_name(game.lang_code, is_short = False)) self.print_msg("%s chooses to Chow %s [%.2f]."%(self.player_name, chow_tiles_str, value)) if game.lang_code is not None: game.add_notification(get_text(game.lang_code, "NOTI_CHOOSE_CHOW")%(self.player_name, ",".join(chow_tiles_tgstrs))) return True, choice
def decide_drop_tile(self, player, new_tile, neighbors, game): self.begin_decision() fixed_hand, hand = player.fixed_hand, player.hand state = utils.extended_dnn_encode_state(player, neighbors, new_tile=new_tile) if self.display_step: self.print_game_board(fixed_hand, hand, neighbors, game, new_tile) pg_model = get_MJPGFitted(self.pg_model_path) valid_actions = [] tiles = player.hand if new_tile is None else player.hand + [new_tile] for tile in tiles: valid_actions.append(Tile.convert_tile_index(tile)) action_filter = np.zeros(n_decisions) action_filter[valid_actions] = 1 action = None if not self.skip_history and self.history_waiting: self.update_transition(state, REWARD_NON_TERMINAL, action_filter) while True: if action is not None and not self.skip_history: self.update_history(state, action, action_filter) self.update_transition(state, REWARD_INVALID_DECISION, action_filter) action, value = pg_model.choose_action(state, action_filter=action_filter, return_value=True, strict_filter=True) if action in valid_actions: break elif not self.is_train: action = random.choice(valid_actions) break if not self.skip_history: self.update_history(state, action, action_filter) drop_tile = Tile.convert_tile_index(action) self.print_msg( "%s [%s] chooses to drop %s. [%.2f]" % (self.player_name, display_name, drop_tile.symbol, value)) self.end_decision(True) if game.lang_code is not None: game.add_notification( get_text(game.lang_code, "NOTI_CHOOSE_DISCARD") % (self.player_name, drop_tile.get_display_name(game.lang_code, is_short=False))) return drop_tile
def decide_kong(self, player, new_tile, kong_tile, location, src, neighbors, game): self.begin_decision() fixed_hand, hand = player.fixed_hand, player.hand if self.display_step: self.print_game_board(fixed_hand, hand, neighbors, game, new_tile) if src == "steal": self.print_msg("Someone just discarded a %s."%kong_tile.symbol) elif src == "draw": self.print_msg("You just drew a %s"%kong_tile.symbol) elif src == "existing": self.print_msg("You have 4 %s in hand"%kong_tile.symbol) if location == "fixed_hand": location = "fixed hand" else: location = "hand" pg_model = get_MJPGFitted(self.pg_model_path) state = utils.extended_dnn_encode_state(player, neighbors, cpk_tile = kong_tile) valid_actions = [34 + decisions_.index("pong"), 34 + decisions_.index("no_action")] action_filter = np.zeros(n_decisions) action_filter[valid_actions] = 1 action = None if not self.skip_history and self.history_waiting: self.update_transition(state, REWARD_NON_TERMINAL, action_filter) while True: if action is not None and not self.skip_history: self.update_history(state, action, action_filter) self.update_transition(state, REWARD_INVALID_DECISION, action_filter) action, value = pg_model.choose_action(state, action_filter = action_filter, return_value = True, strict_filter = True) if action in valid_actions: break elif not self.is_train: action = random.choice(valid_actions) break if not self.skip_history: self.update_history(state, action, action_filter) self.end_decision() if action == 34 + decisions_.index("no_action"): self.print_msg("%s [%s] chooses to form a Kong %s%s%s%s [%.2f]."%(self.player_name, display_name, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, value)) if game.lang_code is not None: game.add_notification(get_text(game.lang_code, "NOTI_CHOOSE_KONG")%(self.player_name, kong_tile.get_display_name(game.lang_code, is_short = False))) return True else: self.print_msg("%s [%s] chooses not to form a Kong %s%s%s%s [%.2f]."%(self.player_name, display_name, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, value)) return False
def update_transition(self, state_, reward = 0, action_filter_ = None): if not self.is_train: return if not self.history_waiting: raise Exception("the network is NOT waiting for a transition") self.history_waiting = False pg_model = get_MJPGFitted(self.pg_model_path) pg_model.store_transition(self.pg_model_history["state"], self.pg_model_history["action"], reward, self.pg_model_history["action_filter"], self.pg_model_history["heuristics_action"])
def test(args): global game_record_count args = parse_args(args) if args.action == "train": if args.save_name is None: response = input( "You have not entered save_name, are you sure? [y/n] ").lower( ) if response != "y": exit(-1) if args.model_dir is None: args.model_dir = pg_model_path else: trainer_models["pgf"]["parameters"][ "pg_model_path"] = args.model_dir trainer_models["pgfr"]["parameters"][ "pg_model_path"] = args.model_dir freq_model_save = args.n_episodes // 10 elif args.action in ["test", "play"]: if args.model_dir is None: raise Exception("model_dir must be given to test/play") model = get_MJPGFitted(args.model_dir, **pg_model_paras) players = [] i = 0 for model_tag in trainer_conf: if args.action == "play": player = Player.Player(MoveGenerator.Human, player_name=names[i]) else: player = Player.Player(trainer_models[model_tag]["class"], player_name=names[i], **trainer_models[model_tag]["parameters"]) players.append(player) i += 1 pg_player = Player.Player(MoveGenerator.PGFGenerator if model_flag == "pgf" else MoveGenerator.PGFRGenerator, player_name=names[i], pg_model_path=args.model_dir, skip_history=False, is_train=args.action == "train", display_step=args.action == "play") players.append(pg_player) if args.action != "play": signal.signal(signal.SIGINT, signal_handler) game, shuffled_players, last_saved = None, None, -1 for i in range(args.n_episodes): if EXIT_FLAG: break if i % freq_shuffle_players == 0: shuffled_players = random.sample(players, k=4) game = Game.Game(shuffled_players) winner, losers, penalty = game.start_game() if args.action == "train": model.learn(display_cost=(i + 1) % game_record_size == 0) index = game_record_count % game_record_size game_record[index, :, :] = np.zeros((4, 2)) game_record_count += 1 if winner is not None: winner_id = players.index(winner) game_record[index, winner_id, 0] = 1 for loser in losers: loser_id = players.index(loser) game_record[index, loser_id, 1] = 1 if (i + 1) % game_record_size == 0: print( "#%5d: %.2f%%/%.2f%%\t%.2f%%/%.2f%%\t%.2f%%/%.2f%%\t%.2f%%/%.2f%%" % (i + 1, game_record[:, 0, 0].mean() * 100, game_record[:, 0, 1].mean() * 100, game_record[:, 1, 0].mean() * 100, game_record[:, 1, 1].mean() * 100, game_record[:, 2, 0].mean() * 100, game_record[:, 2, 1].mean() * 100, game_record[:, 3, 0].mean() * 100, game_record[:, 3, 1].mean() * 100)) ''' if args.action == "train" and args.save_name is not None and (i+1) % freq_model_save == 0: last_saved = i path = args.save_name.rstrip("/") + "_%d"%(i + 1) utils.makesure_dir_exists(path) model.save(path) ''' if args.action == "train" and args.save_name is not None: if last_saved < args.n_episodes - 1: path = args.save_name.rstrip("/") + "_%d" % args.n_episodes utils.makesure_dir_exists(path) model.save(path)