Exemple #1
0
    def get_all_actions(self, state=None):
        if state is not None:
            state, turn = u.encode_state(state)
        else:
            state = self.current_state
            turn = self.current_turn
        # cache_key = self.build_cache_key(state, turn)
        # if self.use_cache and cache_key in self.action_cache:
        #     return self.action_cache[cache_key]
        all_actions = u.get_all_actions(state, turn)

        # if self.use_cache:
        #     self.action_cache[cache_key] = all_actions

        return all_actions
Exemple #2
0
 def is_over(self, state):
     state, turn = u.encode_state(state)
     cache_key = self.build_cache_key(state, turn)
     if self.use_cache and cache_key in self.over_cache:
         return self.over_cache[cache_key]
     num_kings = 0
     for line in state:
         for piece in line:
             if piece != 0 and int(piece[1]) == c.KING:
                 num_kings += 1
                 if num_kings == 2:
                     if self.use_cache:
                         self.over_cache[cache_key] = False
                     return False
     if self.use_cache:
         self.over_cache[cache_key] = True
     return True
Exemple #3
0
    def simulate(self, state, action, return_info=True):
        state, turn = u.encode_state(state)
        # cache_key = self.build_cache_key(state, turn, action)
        # if self.use_cache and cache_key in self.simulation_cache:
        #     if return_info:
        #         return self.simulation_cache[cache_key][0], self.simulation_cache[cache_key][1]
        #     else:
        #         return self.simulation_cache[cache_key][0]

        turn = c.RED if turn == c.BLUE else c.BLUE
        to_x = action['to_x']
        to_y = action['to_y']
        from_x = action['from_x']
        from_y = action['from_y']
        reward = 0
        if state[to_y][to_x] != 0:
            reward = c.REWARD_LIST[int(state[to_y][to_x][1])]

        state[to_y][to_x] = state[from_y][from_x]
        state[from_y][from_x] = 0
        decode_state = u.decode_state(state, turn)

        if return_info:
            is_game_over = False
            if reward > 0:
                if reward == c.REWARD_LIST[c.KING]:
                    reward = 1.
                    is_game_over = True
                else:
                    # reward /= (c.REWARD_LIST[c.CAR] * 2)
                    reward /= (c.REWARD_LIST[c.KING] * 2)

            info = {"is_game_over": is_game_over, "reward": reward}

            return decode_state, info
        else:
            return decode_state
Exemple #4
0
 def encode_state(self, state):
     return u.encode_state(state)
Exemple #5
0
    def print_env(self,
                  is_check=False,
                  is_checkmate=False,
                  to_x=10,
                  to_y=10,
                  done=False,
                  is_draw=False,
                  state=None):
        if state is None:
            by_mcts = False
            state = self.current_state
            turn = self.current_turn
        else:
            if not self.print_mcts_history:
                return
            by_mcts = True
            state, turn = u.encode_state(state)
        if self.interval > 0:
            time.sleep(self.interval)
        if turn == c.BLUE:
            print("%s %s : %d" % ("BLUE", "Turn", self.current_step))
        else:
            print("%s %s : %d" % ("RED", "Turn", self.current_step))
        if not by_mcts:
            print("Score [ BLUE : %f ] [ RED : %f ]" %
                  (self.blue_score, self.red_score))
        if self.use_color_print:
            piece_map = KoreanChessV1.PIECE_MAP_COLOR
            piece_map_moved = KoreanChessV1.PIECE_MAP_COLOR_MOVED
        else:
            piece_map = KoreanChessV1.PIECE_MAP_KOR
            piece_map_moved = KoreanChessV1.PIECE_MAP_KOR_MOVED
        print("  " +
              piece_map[0].join(["%d" % col_idx
                                 for col_idx in range(0, 9)]) + "  X")
        for i, line in enumerate(state):
            if to_y == i:
                line = [
                    piece_map_moved[piece] if j == to_x else piece_map[piece]
                    for j, piece in enumerate(line)
                ]
            else:
                line = [piece_map[piece] for piece in line]
            print("%d %s" % (i, ' '.join(line)))
        print("Y")
        if not by_mcts:
            if is_check:
                print("Check!!")
                if is_checkmate:
                    print("Checkmate!!")
            if done:
                if self.next_turn == c.BLUE:
                    print("BLUE WIN")
                else:
                    print("RED WIN")
            if is_draw:
                print("draw!!")

        if self.current_step >= self.limit_step:
            print("")

        print('======================================================')
Exemple #6
0
    def reset(self):
        self.interval = 0
        self.use_check = True
        self.limit_repeat = 4
        self.limit_step = 200
        self.max_reward = 1
        self.print_mcts_history = False
        self.use_color_print = False
        self.action_history = []
        if self.properties:
            if "interval" in self.properties:
                self.interval = self.properties["interval"]
            if "use_check" in self.properties:
                self.use_check = self.properties["use_check"]
            if "limit_step" in self.properties:
                self.limit_step = self.properties["limit_step"]
            if "max_reward" in self.properties:
                self.max_reward = self.properties["max_reward"]
            if "limit_repeat" in self.properties:
                self.limit_repeat = self.properties["limit_repeat"]
            if "print_mcts_history" in self.properties:
                self.print_mcts_history = self.properties["print_mcts_history"]
            if "use_color_print" in self.properties:
                self.use_color_print = self.properties["use_color_print"]
            if "use_cache" in self.properties:
                self.use_cache = self.properties["use_cache"]
            if "validate_action" in self.properties:
                self.validate_action = self.properties["validate_action"]

        self.limit_action_history = self.limit_repeat + (self.limit_repeat - 2)
        if self.properties and "init_state" in self.properties:
            self.current_state, self.current_turn = u.encode_state(
                self.properties["init_state"])
            self.next_turn = c.RED if self.current_turn == c.BLUE else c.BLUE
        else:
            if not self.properties or ("position_type" not in self.properties
                                       or self.properties['position_type']
                                       == 'random'):
                # random position
                blue_rand_position = random.randint(0, 3)
                red_rand_position = random.randint(0, 3)
                position_type_list = [blue_rand_position, red_rand_position]
            else:
                position_type_list = self.properties['position_type']

            # setting turn
            self.current_turn = c.BLUE
            self.next_turn = c.RED

            # setting state
            current_state = copy.deepcopy(KoreanChessV1.default_state)
            for i, position_type in enumerate(position_type_list):
                if not KoreanChessV1.POSITION_TYPE_LIST[position_type]:
                    raise Exception('position_type is invalid : ' +
                                    str(position_type))

                line_idx = -1 if i == 0 else 0

                current_state[line_idx] = copy.deepcopy(
                    KoreanChessV1.POSITION_TYPE_LIST[position_type][i])
            self.current_state = current_state

        # set scores
        self.blue_score = c.get_score(self.current_state, self.current_turn)
        self.red_score = c.get_score(self.current_state, self.next_turn)
        self.current_step = 0

        # print environment
        self.print_env()

        return u.decode_state(self.current_state, self.current_turn)