Beispiel #1
0
    def f(self, player):
        if not isinstance(player, CapturePlayer):
            player = ModuleDecidingPlayer(player, self.env, greedySelection=True)
        player.color = CaptureGame.WHITE
        self.opponent.color = CaptureGame.BLACK
        self.reset()
        current = 0
        games = 0
        while games < self.maxGames and not self.stable(current):
            games += 1
            self.env.reset()
            self.env.giveHandicap(current , self.opponent.color)
            self.env.playToTheEnd(self.opponent, player)
            win = self.env.winner == player.color
            self.addResult(current, win, self.env.movesDone)
            if self.goUp(current) and current < self.maxHandicaps:
                current += 1
            elif self.goDown(current) and current > 1:
                current -= 1

        high = self.bestHandicap()
        # the scale goes from -1 to (the highest handicap + 1)
        if not self.fluctuating():
            return high + self.winProp(high)
        else:
            return (high - 0.5) + (self.winProp(high) + self.winProp(high - 1)) / 2.
Beispiel #2
0
    def f(self, player):
        if not isinstance(player, CapturePlayer):
            player = ModuleDecidingPlayer(player,
                                          self.env,
                                          greedySelection=True)
        player.color = CaptureGame.WHITE
        self.opponent.color = CaptureGame.BLACK
        self.reset()
        current = 0
        games = 0
        while games < self.maxGames and not self.stable(current):
            games += 1
            self.env.reset()
            self.env.giveHandicap(current, self.opponent.color)
            self.env.playToTheEnd(self.opponent, player)
            win = self.env.winner == player.color
            self.addResult(current, win, self.env.movesDone)
            if self.goUp(current) and current < self.maxHandicaps:
                current += 1
            elif self.goDown(current) and current > 1:
                current -= 1

        high = self.bestHandicap()
        # the scale goes from -1 to (the highest handicap + 1)
        if not self.fluctuating():
            return high + self.winProp(high)
        else:
            return (high -
                    0.5) + (self.winProp(high) + self.winProp(high - 1)) / 2.
Beispiel #3
0
 def f(self, x):
     """ If a module is given, wrap it into a ModuleDecidingAgent before evaluating it.
     Also, if applicable, average the result over multiple games. """
     if isinstance(x, Module):
         agent = ModuleDecidingPlayer(x, self.env, greedySelection = True)
     elif isinstance(x, CapturePlayer):
         agent = x
     else:
         raise NotImplementedError('Missing implementation for '+x.__class__.__name__+' evaluation')
     res = 0
     agent.game = self.env
     self.opponent.game = self.env
     for _ in range(self.averageOverGames):
         agent.color = -self.opponent.color
         x = EpisodicTask.f(self, agent)
         res += x
     return res / float(self.averageOverGames)
 def f(self, x):
     """ If a module is given, wrap it into a ModuleDecidingAgent before evaluating it.
     Also, if applicable, average the result over multiple games. """
     if isinstance(x, Module):
         agent = ModuleDecidingPlayer(x, self.env, greedySelection = True)
     elif isinstance(x, CapturePlayer):
         agent = x
     else:
         raise NotImplementedError('Missing implementation for '+x.__class__.__name__+' evaluation')
     res = 0
     agent.game = self.env
     self.opponent.game = self.env
     for _ in range(self.averageOverGames):
         agent.color = -self.opponent.color
         x = EpisodicTask.f(self, agent)
         res += x
     return res / float(self.averageOverGames)
Beispiel #5
0
    def __call__(self, p1, p2):
        self.temp = self.minTemperature
        if self.useNetworks:
            p1 = ModuleDecidingPlayer(p1, self.task.env, temperature=self.temp)
            p2 = ModuleDecidingPlayer(p2, self.task.env, temperature=self.temp)
        else:
            assert isinstance(p1, CapturePlayer)
            assert isinstance(p2, CapturePlayer)
            p1.game = self.task.env
            p2.game = self.task.env
        p1.color = CaptureGame.BLACK
        p2.color = -p1.color
        self.player = p1
        self.opponent = p2

        # the games with increasing temperatures and lower coefficients
        coeffSum = 0.
        score = 0.
        np = int(self.cases * (1 - self.presetGamesProportion))
        for i in range(self.maxGames):
            coeff = 1 / (10 * self.temp + 1)
            preset = None
            if self.cases > 1:
                if i % self.cases >= np:
                    preset = self.sPos[(i - np) % self.cases]
                elif i < self.cases:
                    # greedy, no need to repeat, just increase the coefficient
                    if i == 0:
                        coeff *= np
                    else:
                        continue
            res = self._oneGame(preset)
            score += coeff * res
            coeffSum += coeff
            if self.cases == 1 or (i % self.cases == 0 and i > 0):
                self._globalWarming()

        return score / coeffSum
Beispiel #6
0
    def __call__(self, p1, p2):
        self.temp = self.minTemperature
        if self.useNetworks:
            p1 = ModuleDecidingPlayer(p1, self.task.env, temperature=self.temp)
            p2 = ModuleDecidingPlayer(p2, self.task.env, temperature=self.temp)
        else:
            assert isinstance(p1, CapturePlayer)
            assert isinstance(p2, CapturePlayer)
            p1.game = self.task.env
            p2.game = self.task.env
        p1.color = CaptureGame.BLACK
        p2.color = -p1.color
        self.player = p1
        self.opponent = p2

        # the games with increasing temperatures and lower coefficients
        coeffSum = 0.
        score = 0.
        np = int(self.cases * (1 - self.presetGamesProportion))
        for i in range(self.maxGames):
            coeff = 1 / (10 * self.temp + 1)
            preset = None
            if self.cases > 1:
                if i % self.cases >= np:
                    preset = self.sPos[(i - np) % self.cases]
                elif i < self.cases:
                    # greedy, no need to repeat, just increase the coefficient
                    if i == 0:
                        coeff *= np
                    else:
                        continue
            res = self._oneGame(preset)
            score += coeff * res
            coeffSum += coeff
            if self.cases == 1 or (i % self.cases == 0 and i > 0):
                self._globalWarming()

        return score / coeffSum
Beispiel #7
0
__author__ = 'Tom Schaul, [email protected]'

from pybrain.rl.environments.twoplayergames import CaptureGame
from pybrain.rl.environments.twoplayergames.capturegameplayers import RandomCapturePlayer, KillingPlayer, ModuleDecidingPlayer
from pybrain.rl.environments.twoplayergames.capturegameplayers.clientwrapper import ClientCapturePlayer
from pybrain.rl.experiments.tournament import Tournament
from pybrain.tools.shortcuts import buildNetwork
from pybrain import SigmoidLayer

game = CaptureGame(5)
randAgent = RandomCapturePlayer(game, name = 'rand')
killAgent = KillingPlayer(game, name = 'kill')

# the network's outputs are probabilities of choosing the action, thus a sigmoid output layer
net = buildNetwork(game.outdim, game.indim, outclass = SigmoidLayer)
netAgent = ModuleDecidingPlayer(net, game, name = 'net')

# same network, but greedy decisions:
netAgentGreedy = ModuleDecidingPlayer(net, game, name = 'greedy', greedySelection = True)

agents = [randAgent, killAgent, netAgent, netAgentGreedy]

try:
    javaAgent = ClientCapturePlayer(game, name = 'java')
    agents.append(javaAgent)
except:
    print('No Java server available.')

print()
print('Starting tournament...')
tourn = Tournament(game, agents)