def f(self, player): if not isinstance(player, CapturePlayer): player = ModuleDecidingPlayer(player, self.env, greedySelection=True) player.color = CaptureGame.WHITE self.opponent.color = CaptureGame.BLACK self.reset() current = 0 games = 0 while games < self.maxGames and not self.stable(current): games += 1 self.env.reset() self.env.giveHandicap(current , self.opponent.color) self.env.playToTheEnd(self.opponent, player) win = self.env.winner == player.color self.addResult(current, win, self.env.movesDone) if self.goUp(current) and current < self.maxHandicaps: current += 1 elif self.goDown(current) and current > 1: current -= 1 high = self.bestHandicap() # the scale goes from -1 to (the highest handicap + 1) if not self.fluctuating(): return high + self.winProp(high) else: return (high - 0.5) + (self.winProp(high) + self.winProp(high - 1)) / 2.
def f(self, player): if not isinstance(player, CapturePlayer): player = ModuleDecidingPlayer(player, self.env, greedySelection=True) player.color = CaptureGame.WHITE self.opponent.color = CaptureGame.BLACK self.reset() current = 0 games = 0 while games < self.maxGames and not self.stable(current): games += 1 self.env.reset() self.env.giveHandicap(current, self.opponent.color) self.env.playToTheEnd(self.opponent, player) win = self.env.winner == player.color self.addResult(current, win, self.env.movesDone) if self.goUp(current) and current < self.maxHandicaps: current += 1 elif self.goDown(current) and current > 1: current -= 1 high = self.bestHandicap() # the scale goes from -1 to (the highest handicap + 1) if not self.fluctuating(): return high + self.winProp(high) else: return (high - 0.5) + (self.winProp(high) + self.winProp(high - 1)) / 2.
def f(self, x): """ If a module is given, wrap it into a ModuleDecidingAgent before evaluating it. Also, if applicable, average the result over multiple games. """ if isinstance(x, Module): agent = ModuleDecidingPlayer(x, self.env, greedySelection = True) elif isinstance(x, CapturePlayer): agent = x else: raise NotImplementedError('Missing implementation for '+x.__class__.__name__+' evaluation') res = 0 agent.game = self.env self.opponent.game = self.env for _ in range(self.averageOverGames): agent.color = -self.opponent.color x = EpisodicTask.f(self, agent) res += x return res / float(self.averageOverGames)
def __call__(self, p1, p2): self.temp = self.minTemperature if self.useNetworks: p1 = ModuleDecidingPlayer(p1, self.task.env, temperature=self.temp) p2 = ModuleDecidingPlayer(p2, self.task.env, temperature=self.temp) else: assert isinstance(p1, CapturePlayer) assert isinstance(p2, CapturePlayer) p1.game = self.task.env p2.game = self.task.env p1.color = CaptureGame.BLACK p2.color = -p1.color self.player = p1 self.opponent = p2 # the games with increasing temperatures and lower coefficients coeffSum = 0. score = 0. np = int(self.cases * (1 - self.presetGamesProportion)) for i in range(self.maxGames): coeff = 1 / (10 * self.temp + 1) preset = None if self.cases > 1: if i % self.cases >= np: preset = self.sPos[(i - np) % self.cases] elif i < self.cases: # greedy, no need to repeat, just increase the coefficient if i == 0: coeff *= np else: continue res = self._oneGame(preset) score += coeff * res coeffSum += coeff if self.cases == 1 or (i % self.cases == 0 and i > 0): self._globalWarming() return score / coeffSum
__author__ = 'Tom Schaul, [email protected]' from pybrain.rl.environments.twoplayergames import CaptureGame from pybrain.rl.environments.twoplayergames.capturegameplayers import RandomCapturePlayer, KillingPlayer, ModuleDecidingPlayer from pybrain.rl.environments.twoplayergames.capturegameplayers.clientwrapper import ClientCapturePlayer from pybrain.rl.experiments.tournament import Tournament from pybrain.tools.shortcuts import buildNetwork from pybrain import SigmoidLayer game = CaptureGame(5) randAgent = RandomCapturePlayer(game, name = 'rand') killAgent = KillingPlayer(game, name = 'kill') # the network's outputs are probabilities of choosing the action, thus a sigmoid output layer net = buildNetwork(game.outdim, game.indim, outclass = SigmoidLayer) netAgent = ModuleDecidingPlayer(net, game, name = 'net') # same network, but greedy decisions: netAgentGreedy = ModuleDecidingPlayer(net, game, name = 'greedy', greedySelection = True) agents = [randAgent, killAgent, netAgent, netAgentGreedy] try: javaAgent = ClientCapturePlayer(game, name = 'java') agents.append(javaAgent) except: print('No Java server available.') print() print('Starting tournament...') tourn = Tournament(game, agents)