Beispiel #1
0
    def acts(self, states):
        obs = torch.cat(
            tuple(Translator.encode_board(s).unsqueeze(0) for s in states), 0)
        valids = [
            torch.tensor(list(
                map(Translator.encode_move_idx, s.micro_legal_moves)),
                         dtype=torch.long) for s in states
        ]
        policy = self.net.forward(obs)[0]
        prob = [policy[i, valids[i]] for i in range(len(states))]
        prob = [prob[i] / prob[i].sum() for i in range(len(states))]
        idxs = [
            valids[i][dist.Categorical(prob[i]).sample()]
            for i in range(len(states))
        ]

        return [
            Translator.decode_move(idxs[i].item(), states[i])
            for i in range(len(states))
        ]