def get_all_actions(self, state=None): if state is not None: state, turn = u.encode_state(state) else: state = self.current_state turn = self.current_turn # cache_key = self.build_cache_key(state, turn) # if self.use_cache and cache_key in self.action_cache: # return self.action_cache[cache_key] all_actions = u.get_all_actions(state, turn) # if self.use_cache: # self.action_cache[cache_key] = all_actions return all_actions
def is_over(self, state): state, turn = u.encode_state(state) cache_key = self.build_cache_key(state, turn) if self.use_cache and cache_key in self.over_cache: return self.over_cache[cache_key] num_kings = 0 for line in state: for piece in line: if piece != 0 and int(piece[1]) == c.KING: num_kings += 1 if num_kings == 2: if self.use_cache: self.over_cache[cache_key] = False return False if self.use_cache: self.over_cache[cache_key] = True return True
def simulate(self, state, action, return_info=True): state, turn = u.encode_state(state) # cache_key = self.build_cache_key(state, turn, action) # if self.use_cache and cache_key in self.simulation_cache: # if return_info: # return self.simulation_cache[cache_key][0], self.simulation_cache[cache_key][1] # else: # return self.simulation_cache[cache_key][0] turn = c.RED if turn == c.BLUE else c.BLUE to_x = action['to_x'] to_y = action['to_y'] from_x = action['from_x'] from_y = action['from_y'] reward = 0 if state[to_y][to_x] != 0: reward = c.REWARD_LIST[int(state[to_y][to_x][1])] state[to_y][to_x] = state[from_y][from_x] state[from_y][from_x] = 0 decode_state = u.decode_state(state, turn) if return_info: is_game_over = False if reward > 0: if reward == c.REWARD_LIST[c.KING]: reward = 1. is_game_over = True else: # reward /= (c.REWARD_LIST[c.CAR] * 2) reward /= (c.REWARD_LIST[c.KING] * 2) info = {"is_game_over": is_game_over, "reward": reward} return decode_state, info else: return decode_state
def encode_state(self, state): return u.encode_state(state)
def print_env(self, is_check=False, is_checkmate=False, to_x=10, to_y=10, done=False, is_draw=False, state=None): if state is None: by_mcts = False state = self.current_state turn = self.current_turn else: if not self.print_mcts_history: return by_mcts = True state, turn = u.encode_state(state) if self.interval > 0: time.sleep(self.interval) if turn == c.BLUE: print("%s %s : %d" % ("BLUE", "Turn", self.current_step)) else: print("%s %s : %d" % ("RED", "Turn", self.current_step)) if not by_mcts: print("Score [ BLUE : %f ] [ RED : %f ]" % (self.blue_score, self.red_score)) if self.use_color_print: piece_map = KoreanChessV1.PIECE_MAP_COLOR piece_map_moved = KoreanChessV1.PIECE_MAP_COLOR_MOVED else: piece_map = KoreanChessV1.PIECE_MAP_KOR piece_map_moved = KoreanChessV1.PIECE_MAP_KOR_MOVED print(" " + piece_map[0].join(["%d" % col_idx for col_idx in range(0, 9)]) + " X") for i, line in enumerate(state): if to_y == i: line = [ piece_map_moved[piece] if j == to_x else piece_map[piece] for j, piece in enumerate(line) ] else: line = [piece_map[piece] for piece in line] print("%d %s" % (i, ' '.join(line))) print("Y") if not by_mcts: if is_check: print("Check!!") if is_checkmate: print("Checkmate!!") if done: if self.next_turn == c.BLUE: print("BLUE WIN") else: print("RED WIN") if is_draw: print("draw!!") if self.current_step >= self.limit_step: print("") print('======================================================')
def reset(self): self.interval = 0 self.use_check = True self.limit_repeat = 4 self.limit_step = 200 self.max_reward = 1 self.print_mcts_history = False self.use_color_print = False self.action_history = [] if self.properties: if "interval" in self.properties: self.interval = self.properties["interval"] if "use_check" in self.properties: self.use_check = self.properties["use_check"] if "limit_step" in self.properties: self.limit_step = self.properties["limit_step"] if "max_reward" in self.properties: self.max_reward = self.properties["max_reward"] if "limit_repeat" in self.properties: self.limit_repeat = self.properties["limit_repeat"] if "print_mcts_history" in self.properties: self.print_mcts_history = self.properties["print_mcts_history"] if "use_color_print" in self.properties: self.use_color_print = self.properties["use_color_print"] if "use_cache" in self.properties: self.use_cache = self.properties["use_cache"] if "validate_action" in self.properties: self.validate_action = self.properties["validate_action"] self.limit_action_history = self.limit_repeat + (self.limit_repeat - 2) if self.properties and "init_state" in self.properties: self.current_state, self.current_turn = u.encode_state( self.properties["init_state"]) self.next_turn = c.RED if self.current_turn == c.BLUE else c.BLUE else: if not self.properties or ("position_type" not in self.properties or self.properties['position_type'] == 'random'): # random position blue_rand_position = random.randint(0, 3) red_rand_position = random.randint(0, 3) position_type_list = [blue_rand_position, red_rand_position] else: position_type_list = self.properties['position_type'] # setting turn self.current_turn = c.BLUE self.next_turn = c.RED # setting state current_state = copy.deepcopy(KoreanChessV1.default_state) for i, position_type in enumerate(position_type_list): if not KoreanChessV1.POSITION_TYPE_LIST[position_type]: raise Exception('position_type is invalid : ' + str(position_type)) line_idx = -1 if i == 0 else 0 current_state[line_idx] = copy.deepcopy( KoreanChessV1.POSITION_TYPE_LIST[position_type][i]) self.current_state = current_state # set scores self.blue_score = c.get_score(self.current_state, self.current_turn) self.red_score = c.get_score(self.current_state, self.next_turn) self.current_step = 0 # print environment self.print_env() return u.decode_state(self.current_state, self.current_turn)