def _fast_match(nnet1: NNet, nnet2: NNet) -> int: game = Game() while True: if game.player == 1: policy = nnet1.prediction(state=game.game_state, player=game.player)[0] else: policy = nnet2.prediction(state=game.game_state, player=game.player)[0] game.make_move(random.choices(range(len(policy)), weights=policy)[0]) if game.has_won(game.game_state, player=game.player * -1): return game.player * -1 elif game.is_draw(game.game_state): return 0
def fetch_prediction(self, nnet: NNet, x_noise: float = 0.) -> None: self.policy, self.nnet_value = nnet.prediction(self.state, player=1) if not x_noise: return d = scipy.stats.dirichlet.rvs([1.0 for _ in range(c.COLUMNS)])[0] self.policy = (1 - x_noise) * self.policy + 1 * d