def __call__(self, p1, p2): self.temp = self.minTemperature if self.useNetworks: p1 = ModuleDecidingPlayer(p1, self.task.env, temperature = self.temp) p2 = ModuleDecidingPlayer(p2, self.task.env, temperature = self.temp) else: assert isinstance(p1, GomokuPlayer) assert isinstance(p2, GomokuPlayer) p1.game = self.task.env p2.game = self.task.env p1.color = GomokuGame.BLACK p2.color = -p1.color self.player = p1 self.opponent = p2 # the games with increasing temperatures and lower coefficients coeffSum = 0. res = 0. for i in range(self.maxGames): coeff = 1/(10*self.temp+1) res += coeff * self._oneGame() coeffSum += coeff if i > 0: self._globalWarming() return res / coeffSum
def __call__(self, p1, p2): self.temp = self.minTemperature if self.useNetworks: p1 = ModuleDecidingPlayer(p1, self.task.env, temperature=self.temp) p2 = ModuleDecidingPlayer(p2, self.task.env, temperature=self.temp) else: assert isinstance(p1, GomokuPlayer) assert isinstance(p2, GomokuPlayer) p1.game = self.task.env p2.game = self.task.env p1.color = GomokuGame.BLACK p2.color = -p1.color self.player = p1 self.opponent = p2 # the games with increasing temperatures and lower coefficients coeffSum = 0. res = 0. for i in range(self.maxGames): coeff = 1 / (10 * self.temp + 1) res += coeff * self._oneGame() coeffSum += coeff if i > 0: self._globalWarming() return res / coeffSum
def __call__(self, x): """ If a module is given, wrap it into a ModuleDecidingAgent before evaluating it. Also, if applicable, average the result over multiple games. """ if isinstance(x, Module): agent = ModuleDecidingPlayer(x, self.env, greedySelection = True) elif isinstance(x, GomokuPlayer): agent = x else: raise NotImplementedError('Missing implementation for '+x.__class__.__name__+' evaluation') res = 0 agent.game = self.env self.opponent.game = self.env for dummy in range(self.averageOverGames): agent.color = -self.opponent.color res += EpisodicTask.__call__(self, agent) return res / float(self.averageOverGames)
def __call__(self, x): """ If a module is given, wrap it into a ModuleDecidingAgent before evaluating it. Also, if applicable, average the result over multiple games. """ if isinstance(x, Module): agent = ModuleDecidingPlayer(x, self.env, greedySelection=True) elif isinstance(x, GomokuPlayer): agent = x else: raise NotImplementedError('Missing implementation for ' + x.__class__.__name__ + ' evaluation') res = 0 agent.game = self.env self.opponent.game = self.env for dummy in range(self.averageOverGames): agent.color = -self.opponent.color res += EpisodicTask.__call__(self, agent) return res / float(self.averageOverGames)