def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration): hashed_state = Game.hash_state(X_s, O_s) if not (hashed_state in self.counter): self.counter[hashed_state] = np.ones(X_s.shape) self.last_visited[hashed_state] = np.ones(X_s.shape) filter = np.logical_or(X_s, O_s) exploit_values = value_fn.get_action_values( X_s, O_s).to('cpu').detach().numpy().squeeze() counters = self.counter[hashed_state] explore_values = np.sqrt(np.log(iteration) / counters) combined = exploit_values + self.c * explore_values combined[filter] = -10000 max_value = np.max(combined) indices = np.argwhere(combined == max_value) # print(values) # print(max_value) index = np.random.choice(indices.shape[0]) (y, x) = indices[index, :] # print(values[y, x]) self.counter[hashed_state][y, x] += 1 self.last_visited[hashed_state][y, x] = iteration return Position(x, y), exploit_values[y, x]
def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration): hashed_state = Game.hash_state(X_s, O_s) if not (hashed_state in self.times_visited): self.times_visited[hashed_state] = np.zeros(X_s.shape) rand_n = np.random.random() # print("epsilon: ", epsilon) if rand_n < self.epsilon: self.random_actions += 1 # print("random action") random_action = self.random_action(X_s, O_s) self.times_visited[hashed_state][random_action.y, random_action.x] += 1 return random_action, value_fn.get_action_value( X_s, O_s, random_action) values = value_fn.get_action_values(X_s, O_s) filter = np.logical_or(X_s, O_s) values[filter] = -10000 max_value = np.max(values) indices = np.argwhere(values == max_value) # print(values) # print(max_value) index = np.random.choice(indices.shape[0]) (y, x) = indices[index, :] # print(values[y, x]) self.times_visited[hashed_state][y, x] += 1 return Position(x, y), max_value
def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration): hashed_state = Game.hash_state(X_s, O_s) if not (hashed_state in self.counter): self.counter[hashed_state] = np.ones(X_s.shape) self.times_visited[hashed_state] = np.zeros(X_s.shape) self.last_visited[hashed_state] = np.ones(X_s.shape) filter = np.logical_or(X_s, O_s) exploit_values = value_fn.get_action_values(X_s, O_s) decay = self.lambd**(iteration - self.last_visited[hashed_state]) decay = decay + 0.000001 counters = self.counter[hashed_state] * decay mean = np.mean(counters[counters != 1]) explore_values = mean / (self.counter[hashed_state] * decay) linear_combination = ( 1 - self.comma) * explore_values + self.comma * exploit_values linear_combination[filter] = -10000 max_value = np.max(linear_combination) indices = np.argwhere(linear_combination == max_value) # print(values) # print(max_value) index = np.random.choice(indices.shape[0]) (y, x) = indices[index, :] # print(values[y, x]) self.counter[hashed_state][y, x] += 1 self.times_visited[hashed_state][y, x] += 1 self.last_visited[hashed_state][y, x] = iteration return Position(x, y), exploit_values[y, x]
def annotate_map(self, name, save=False, show=True): """Annotate the map of the zone `name`. Optionally save the modified asset file and its png preview Saves are made both in the TexTools folder for easy import and to the map project folder for repo update.""" map_layer = Image.open(self._get_path(name, backup=True)) marker_layer = Image.new("RGBA", map_layer.size, color=(0, 0, 0, 0)) scale = self._zones[name]["scale"] zone_marks = self._get_zone_marks(name, True) spawns = defaultdict(dict) for mark, (rank, spots) in zone_marks.items(): for p in spots: spawns[tuple(p)][mark] = rank for spawn, marks in spawns.items(): screen_position = Position(m2c(spawn[0], scale), m2c(spawn[1], scale)) self._draw_marker(marker_layer, screen_position, marks) marker_layer = drop_shadow( marker_layer, offset=Position(*self._config["marker"]["shadow_offset"]), shadow_color=self._config["marker"]["shadow_color"], iterations=self._config["marker"]["shadow_iterations"], scale=self._config["marker"]["shadow_scale"], direction=self._config["marker"]["shadow_direction"], ) marks = {name: rank for name, (rank, _) in zone_marks.items()} new_map = Image.alpha_composite(map_layer, marker_layer) legend_rows = self._zones[name]["legend"]["rows"] legend_position = Position(*self._zones[name]["legend"]["position"]) complete_map = self._draw_legend(new_map, marks, legend_rows, legend_position) if save: self._save_map(complete_map, name) if self._iscli and show: complete_map.show(title=name) return return complete_map
def get_hashes_for_actions(X_s, O_s): hashes = np.empty(X_s.shape, 'object') combined = X_s * 2 + O_s it = np.nditer(combined, flags=['multi_index']) for x in it: index = it.multi_index if (x == 0): p = Position(index[1], index[0]) action_hash = get_hash_for_action(X_s, O_s, p) hashes[index[0], index[1]] = action_hash else: hashes[index[0], index[1]] = '000000000' return hashes
def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration): values = value_fn.get_action_values( X_s, O_s).to('cpu').detach().numpy().squeeze() filter = np.logical_or(X_s, O_s) values[filter] = -10000 max_value = np.max(values) indices = np.argwhere(values == max_value) # print(values) # print(max_value) index = np.random.choice(indices.shape[0]) (y, x) = indices[index, :] # print(values[y, x]) return Position(x, y), max_value
def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration): has_picked_legal_move = False p = None while not has_picked_legal_move: print("board: ") Game.print_game_state(X_s, O_s) player_input = input("Your move:") #1,1 or 2,0 etc. coords = player_input.split(",") x = coords[0] y = coords[1] p = Position(int(x), int(y)) has_picked_legal_move = self.is_move_legal(X_s, O_s, p) return p, 0
def pick_action(self, value_fn, X_s, O_s, iteration): random_values = np.random.rand(*X_s.shape) filter = np.logical_or(X_s, O_s) random_values[filter] = 0 (y, x) = np.unravel_index(random_values.argmax(), random_values.shape) return Position(x, y), random_values[y, x]
def random_action(self, X_s, O_s): random_values = np.random.rand(*X_s.shape) filter = np.logical_or(X_s, O_s) random_values[filter] = -10000 (y, x) = np.unravel_index(random_values.argmax(), random_values.shape) return Position(x, y)