Example #1
0
    def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration):

        hashed_state = Game.hash_state(X_s, O_s)

        if not (hashed_state in self.counter):
            self.counter[hashed_state] = np.ones(X_s.shape)
            self.last_visited[hashed_state] = np.ones(X_s.shape)

        filter = np.logical_or(X_s, O_s)

        exploit_values = value_fn.get_action_values(
            X_s, O_s).to('cpu').detach().numpy().squeeze()
        counters = self.counter[hashed_state]
        explore_values = np.sqrt(np.log(iteration) / counters)
        combined = exploit_values + self.c * explore_values

        combined[filter] = -10000
        max_value = np.max(combined)
        indices = np.argwhere(combined == max_value)
        # print(values)
        # print(max_value)
        index = np.random.choice(indices.shape[0])
        (y, x) = indices[index, :]
        # print(values[y, x])
        self.counter[hashed_state][y, x] += 1
        self.last_visited[hashed_state][y, x] = iteration
        return Position(x, y), exploit_values[y, x]
    def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration):

        hashed_state = Game.hash_state(X_s, O_s)

        if not (hashed_state in self.times_visited):
            self.times_visited[hashed_state] = np.zeros(X_s.shape)

        rand_n = np.random.random()
        # print("epsilon: ", epsilon)
        if rand_n < self.epsilon:
            self.random_actions += 1
            # print("random action")
            random_action = self.random_action(X_s, O_s)
            self.times_visited[hashed_state][random_action.y,
                                             random_action.x] += 1
            return random_action, value_fn.get_action_value(
                X_s, O_s, random_action)

        values = value_fn.get_action_values(X_s, O_s)
        filter = np.logical_or(X_s, O_s)
        values[filter] = -10000
        max_value = np.max(values)
        indices = np.argwhere(values == max_value)
        # print(values)
        # print(max_value)
        index = np.random.choice(indices.shape[0])
        (y, x) = indices[index, :]
        # print(values[y, x])
        self.times_visited[hashed_state][y, x] += 1
        return Position(x, y), max_value
Example #3
0
    def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration):

        hashed_state = Game.hash_state(X_s, O_s)

        if not (hashed_state in self.counter):
            self.counter[hashed_state] = np.ones(X_s.shape)
            self.times_visited[hashed_state] = np.zeros(X_s.shape)
            self.last_visited[hashed_state] = np.ones(X_s.shape)

        filter = np.logical_or(X_s, O_s)

        exploit_values = value_fn.get_action_values(X_s, O_s)
        decay = self.lambd**(iteration - self.last_visited[hashed_state])
        decay = decay + 0.000001
        counters = self.counter[hashed_state] * decay
        mean = np.mean(counters[counters != 1])
        explore_values = mean / (self.counter[hashed_state] * decay)
        linear_combination = (
            1 - self.comma) * explore_values + self.comma * exploit_values

        linear_combination[filter] = -10000
        max_value = np.max(linear_combination)
        indices = np.argwhere(linear_combination == max_value)
        # print(values)
        # print(max_value)
        index = np.random.choice(indices.shape[0])
        (y, x) = indices[index, :]
        # print(values[y, x])
        self.counter[hashed_state][y, x] += 1
        self.times_visited[hashed_state][y, x] += 1
        self.last_visited[hashed_state][y, x] = iteration
        return Position(x, y), exploit_values[y, x]
    def annotate_map(self, name, save=False, show=True):
        """Annotate the map of the zone `name`. Optionally save the modified asset file and its png preview
        
        Saves are made both in the TexTools folder for easy import and to the map project folder for repo update."""
        map_layer = Image.open(self._get_path(name, backup=True))

        marker_layer = Image.new("RGBA", map_layer.size, color=(0, 0, 0, 0))

        scale = self._zones[name]["scale"]
        zone_marks = self._get_zone_marks(name, True)
        spawns = defaultdict(dict)
        for mark, (rank, spots) in zone_marks.items():
            for p in spots:
                spawns[tuple(p)][mark] = rank
        for spawn, marks in spawns.items():
            screen_position = Position(m2c(spawn[0], scale),
                                       m2c(spawn[1], scale))
            self._draw_marker(marker_layer, screen_position, marks)

        marker_layer = drop_shadow(
            marker_layer,
            offset=Position(*self._config["marker"]["shadow_offset"]),
            shadow_color=self._config["marker"]["shadow_color"],
            iterations=self._config["marker"]["shadow_iterations"],
            scale=self._config["marker"]["shadow_scale"],
            direction=self._config["marker"]["shadow_direction"],
        )

        marks = {name: rank for name, (rank, _) in zone_marks.items()}
        new_map = Image.alpha_composite(map_layer, marker_layer)
        legend_rows = self._zones[name]["legend"]["rows"]
        legend_position = Position(*self._zones[name]["legend"]["position"])
        complete_map = self._draw_legend(new_map, marks, legend_rows,
                                         legend_position)

        if save:
            self._save_map(complete_map, name)
        if self._iscli and show:
            complete_map.show(title=name)
            return
        return complete_map
Example #5
0
def get_hashes_for_actions(X_s, O_s):
    hashes = np.empty(X_s.shape, 'object')
    combined = X_s * 2 + O_s
    it = np.nditer(combined, flags=['multi_index'])
    for x in it:
        index = it.multi_index
        if (x == 0):
            p = Position(index[1], index[0])
            action_hash = get_hash_for_action(X_s, O_s, p)
            hashes[index[0], index[1]] = action_hash
        else:
            hashes[index[0], index[1]] = '000000000'
    return hashes
Example #6
0
    def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration):

        values = value_fn.get_action_values(
            X_s, O_s).to('cpu').detach().numpy().squeeze()
        filter = np.logical_or(X_s, O_s)
        values[filter] = -10000
        max_value = np.max(values)
        indices = np.argwhere(values == max_value)
        # print(values)
        # print(max_value)
        index = np.random.choice(indices.shape[0])
        (y, x) = indices[index, :]
        # print(values[y, x])
        return Position(x, y), max_value
Example #7
0
    def pick_action(self, value_fn: ActionValueFunction, X_s, O_s, iteration):

        has_picked_legal_move = False
        p = None

        while not has_picked_legal_move:

            print("board: ")
            Game.print_game_state(X_s, O_s)
            player_input = input("Your move:")  #1,1 or 2,0 etc.

            coords = player_input.split(",")
            x = coords[0]
            y = coords[1]
            p = Position(int(x), int(y))

            has_picked_legal_move = self.is_move_legal(X_s, O_s, p)

        return p, 0
Example #8
0
 def pick_action(self, value_fn, X_s, O_s, iteration):
     random_values = np.random.rand(*X_s.shape)
     filter = np.logical_or(X_s, O_s)
     random_values[filter] = 0
     (y, x) = np.unravel_index(random_values.argmax(), random_values.shape)
     return Position(x, y), random_values[y, x]
 def random_action(self, X_s, O_s):
     random_values = np.random.rand(*X_s.shape)
     filter = np.logical_or(X_s, O_s)
     random_values[filter] = -10000
     (y, x) = np.unravel_index(random_values.argmax(), random_values.shape)
     return Position(x, y)