Example #1
0
    def decide_pong(self, player, new_tile, neighbors, game):
        self.begin_decision()

        fixed_hand, hand = player.fixed_hand, player.hand

        if self.display_step:
            self.print_game_board(fixed_hand, hand, neighbors, game, new_tile)

        self.print_msg("Someone just discarded a %s." % new_tile.symbol)

        q_network = get_Network(self.q_network_path)
        state = utils.dnn_encode_state(player, neighbors)

        if not self.skip_history and self.history_waiting:
            self.update_transition(state, REWARD_NON_TERMINAL)

        valid_actions = [34 + decisions_.index("%s_pong" % new_tile.suit), 34 + decisions_.index("no_action")]
        action_filter = np.zeros(n_decisions)
        action_filter[valid_actions] = 1
        action = None

        while True:
            if action is not None and not self.skip_history:
                self.update_history(state, action, action_filter)
                self.update_transition(state, REWARD_INVALID_DECISION)

            action, value = q_network.choose_action(state, action_filter=action_filter, eps_greedy=self.is_train,
                                                    return_value=True, strict_filter=not self.is_train)

            if action in valid_actions:
                break
            elif not self.is_train:
                action = random.choice(valid_actions)
                break

        if not self.skip_history:
            self.update_history(state, action, action_filter)

        self.end_decision()
        if action == 34 + decisions_.index("no_action"):
            self.print_msg("%s [%s] chooses to form a Pong %s%s%s. [%.2f]" % (
            self.player_name, display_name, new_tile.symbol, new_tile.symbol, new_tile.symbol, value))
            if game.lang_code is not None:
                game.add_notification(get_text(game.lang_code, "NOTI_CHOOSE_PONG") % (
                self.player_name, new_tile.get_display_name(game.lang_code, is_short=False)))
            return True
        else:
            self.print_msg("%s [%s] chooses not to form a Pong %s%s%s. [%.2f]" % (
            self.player_name, display_name, new_tile.symbol, new_tile.symbol, new_tile.symbol, value))
            return False
Example #2
0
    def update_transition(self, state_, reward=0, action_filter_=None):
        if not self.is_train:
            return

        if not self.history_waiting:
            raise Exception("the network is NOT waiting for a transition")

        if type(state_) == str and state_ == "terminal":
            state_ = self.q_network_history["state"]

        self.history_waiting = False
        q_network = get_Network(self.q_network_path)
        q_network.store_transition(self.q_network_history["state"], self.q_network_history["action"], reward, state_,
                                   self.q_network_history["action_filter"])
Example #3
0
def main():
    global game_record_count
    trainer_models["deepq"]["parameters"]["q_network_path"] = deep_q_model_dir
    model = get_Network(deep_q_model_dir, **deep_q_model_paras)
    players = []
    i = 0
    for model_tag in trainer_conf:
        player = Player.Player(trainer_models[model_tag]["class"], player_name=names[i], **trainer_models[model_tag]["parameters"])
        players.append(player)
        i += 1
    deepq_player = Player.Player(Generator, player_name=names[i], q_network_path=deep_q_model_dir,
                                 skip_history=False, is_train=True,
                                 display_step=False)
    players.append(deepq_player)
    signal.signal(signal.SIGINT, signal_handler)
    game, shuffled_players, last_saved = None, None, -1
    for i in range(n_epochs):
        if EXIT_FLAG:
            break

        if i % freq_shuffle_players == 0:
            shuffled_players = random.sample(players, k=4)
            game = Game.Game(shuffled_players)

        winner, losers, penalty = game.start_game()
        model.learn(display_cost=(i + 1) % game_record_size == 0)

        index = game_record_count % game_record_size
        game_record[index, :, :] = np.zeros((4, 2))
        game_record_count += 1

        if winner is not None:
            winner_id = players.index(winner)
            game_record[index, winner_id, 0] = 1
            for loser in losers:
                loser_id = players.index(loser)
                game_record[index, loser_id, 1] = 1

        if (i + 1) % game_record_size == 0:
            print("#%5d: %.2f%%/%.2f%%\t%.2f%%/%.2f%%\t%.2f%%/%.2f%%\t%.2f%%/%.2f%%" % (
            i + 1, game_record[:, 0, 0].mean() * 100, game_record[:, 0, 1].mean() * 100,
            game_record[:, 1, 0].mean() * 100, game_record[:, 1, 1].mean() * 100,
            game_record[:, 2, 0].mean() * 100, game_record[:, 2, 1].mean() * 100,
            game_record[:, 3, 0].mean()* 100, game_record[:, 3, 1].mean()* 100))

        if last_saved < n_epochs - 1:
            path = save_name.rstrip("/") + "_%d" % n_epochs
            utils.makesure_dir_exists(path)
            model.save(path)
Example #4
0
    def decide_drop_tile(self, player, new_tile, neighbors, game):
        self.begin_decision()

        fixed_hand, hand = player.fixed_hand, player.hand
        state = utils.dnn_encode_state(player, neighbors)

        if not self.skip_history and self.history_waiting:
            self.update_transition(state, REWARD_NON_TERMINAL)

        if self.display_step:
            self.print_game_board(fixed_hand, hand, neighbors, game, new_tile)

        q_network = get_Network(self.q_network_path)

        valid_actions = []
        tiles = player.hand if new_tile is None else player.hand + [new_tile]
        for tile in tiles:
            valid_actions.append(Tile.convert_tile_index(tile))

        action_filter = np.zeros(n_decisions)
        action_filter[valid_actions] = 1
        action = None
        while True:
            if action is not None and not self.skip_history:
                self.update_history(state, action, action_filter)
                self.update_transition(state, REWARD_INVALID_DECISION)

            action, value = q_network.choose_action(state, action_filter=action_filter, eps_greedy=self.is_train,
                                                    return_value=True, strict_filter=not self.is_train)

            if action in valid_actions:
                break
            elif not self.is_train:
                action = random.choice(valid_actions)
                break

        if not self.skip_history:
            self.update_history(state, action, action_filter)
        drop_tile = Tile.convert_tile_index(action)
        self.print_msg("%s [%s] chooses to drop %s. [%.2f]" % (self.player_name, display_name, drop_tile.symbol, value))
        self.end_decision(True)

        if game.lang_code is not None:
            game.add_notification(get_text(game.lang_code, "NOTI_CHOOSE_DISCARD") % (
            self.player_name, drop_tile.get_display_name(game.lang_code, is_short=False)))

        return drop_tile
Example #5
0
    def decide_chow(self, player, new_tile, choices, neighbors, game):
        self.begin_decision()

        fixed_hand, hand = player.fixed_hand, player.hand

        if self.display_step:
            self.print_game_board(fixed_hand, hand, neighbors, game)

        self.print_msg("Someone just discarded a %s." % new_tile.symbol)

        q_network = get_Network(self.q_network_path)
        state = utils.dnn_encode_state(player, neighbors)

# store the transition to the network (state(t-1), action(t-1), reward(not terminated), state(t)
        if not self.skip_history and self.history_waiting:
            self.update_transition(state, REWARD_NON_TERMINAL)

        valid_actions = [34 + decisions_.index("%s_chow" % new_tile.suit), 34 + decisions_.index("no_action")]
        action_filter = np.zeros(n_decisions)
        action_filter[valid_actions] = 1
        action = None

# choose the action
        while True:
            if action is not None and not self.skip_history:
                self.update_history(state, action, action_filter)
                self.update_transition(state, REWARD_INVALID_DECISION)

            action, value = q_network.choose_action(state, action_filter=action_filter, eps_greedy=self.is_train,
                                                    return_value=True, strict_filter=not self.is_train)

            if action in valid_actions:
                break
            elif not self.is_train:
                action = random.choice(valid_actions)
                break

        if not self.skip_history:
            self.update_history(state, action, action_filter)

        self.end_decision()

# print the msg of the choice taken
        if action == 34 + decisions_.index("no_action"):
            self.print_msg("%s chooses not to Chow %s [%.2f]." % (self.player_name, new_tile.symbol, value))
            return False, None
        else:
            chow_tiles_tgstrs = []
            chow_tiles_str = ""
            choice = random.choice(choices)
            for i in range(choice - 1, choice + 2):
                neighbor_tile = new_tile.generate_neighbor_tile(i)
                chow_tiles_str += neighbor_tile.symbol
                chow_tiles_tgstrs.append(neighbor_tile.get_display_name(game.lang_code, is_short=False))

            self.print_msg("%s chooses to Chow %s [%.2f]." % (self.player_name, chow_tiles_str, value))

            if game.lang_code is not None:
                game.add_notification(
                    get_text(game.lang_code, "NOTI_CHOOSE_CHOW") % (self.player_name, ",".join(chow_tiles_tgstrs)))

            return True, choice