class BoydAEGSS(Strategy): def __init__(self, game): Strategy.__init__(self) self._lambda = Lambda(0.0) def respond(self, game): if self.getRoundsPlayed() == 0: self._lambda.nochange() else: MyLast = self.getLastResponsePair()[0] OppLast = self.getLastResponsePair()[1] if MyLast == 'C' and OppLast == 'C': self._lambda.nochange() elif MyLast == 'C' and OppLast == 'D': self._lambda.incrementValue() elif MyLast == 'D' and OppLast == 'C': self._lambda.decrementValue() elif MyLast == 'D' and OppLast == 'D': self._lambda.incrementValue() return Strategy.NashEquilibrium(self._lambda) def name(self): return "AE-GS-S from Boyd's Tournament" def author(self): return "In-house (Georgios Sakellariou)"
def __init__(self, game): Strategy.__init__(self) self.__LastState = -999 self.__LastAction = -999 self.__CurrentState = -999 self.__CurrentAction = -999 self._lambda = Lambda(BoydRLQTableI.__CoopRatio) self.initialQ(game) # initialise the Q-Table with the formulas BoydRLQTableI.__Counter = 0 self.__ExplorePercentage = 5.0 # for epsilon-greedy BoydRLQTableI.__LastLambda = BoydRLQTableI.__CoopRatio
class BoydABitNicer(Strategy): def __init__(self, game): Strategy.__init__(self) self._lambda = Lambda(0.1) self.myCtr = 0 # counting my cooperation times self.oppCtr = 0 # counting the opponent's cooperation times def respond(self, game): if self.getRoundsPlayed() == 0: self._lambda.nochange() else: MyLast = self.getLastResponsePair()[0] OppLast = self.getLastResponsePair()[1] if MyLast == 'C': self.myCtr += 1 if OppLast == 'C': self.oppCtr += 1 # adjust the lambda according to the difference of cooperation times if self.myCtr < self.oppCtr + 10: self._lambda.incrementValue() elif self.myCtr > self.oppCtr + 20: self._lambda.decrementValue() else: self._lambda.nochange() return Strategy.NashEquilibrium(self._lambda) def name(self): return "A Bit Nicer from Boyd's Tournament" def author(self): return "Xiuyi Fan"
def Calculate(cmd, x): a = { 'exit': None, 'pi': math.pi, 'sqrt': lambda x: math.sqrt(x), 'sqr': lambda x: x ** 2, 'p': lambda x, y: x ** y, '+': lambda x, y: x + y, '-': lambda x, y: x - y, '*': lambda x, y: x * y, '/': lambda x, y: x / y } if cmd == 'help': for action in iter(a): print(action) return x for action in iter(a): if cmd.startswith(action): if not Lambda.isLambda(a[action]): return a[action] elif Lambda.args(a[action]) == ['x']: return a[action](x) elif Lambda.args(a[action]) == ['x','y']: y = float(cmd[len(action):]) return a[action](x,y) if Numbers.IsFloat(cmd): return float(cmd) print(cmd, "unrecognized.") return x
class BoydABitRandom(Strategy): def __init__(self, game): Strategy.__init__(self) self._lambda = Lambda(0.1) def respond(self, game): if self.getRoundsPlayed() == 0: self._lambda.nochange() else: MyLast = self.getLastResponsePair()[0] OppLast = self.getLastResponsePair()[1] # adjust the lambda according to the AEGSS if MyLast == 'C' and OppLast == 'C': self._lambda.nochange() elif MyLast == 'C' and OppLast == 'D': self._lambda.incrementValue() elif MyLast == 'D' and OppLast == 'C': self._lambda.decrementValue() elif MyLast == 'D' and OppLast == 'D': self._lambda.incrementValue() resp = Strategy.NashEquilibrium(self._lambda) if resp == 'D': if random.randint(1, 19) < 5: resp = 'C' elif resp == 'C': if random.randint(1, 19) < 5: resp = 'D' return resp def name(self): return "A Bit Random from Boyd's Tournament" def author(self): return "Xiuyi Fan"
class BoydSocialTitForTat(Strategy): def __init__(self, game): Strategy.__init__(self) self._lambda = Lambda(random.random()) # random initialisation for the lambda def respond(self, game): if self.getRoundsPlayed() == 0: self._lambda.nochange() return 'C' else: # from the second round if self.getLastResponsePair()[1] == 'C': self._lambda.incrementValue() return 'C' else: self._lambda.decrementValue() return 'D' def name(self): return "Social Tit For Tat from Boyd's Tournament" def author(self): return "In-house (Theodore Boyd)"
def __init__(self, game): Strategy.__init__(self) self._lambda = Lambda(0.1) self.myCtr = 0 # counting my cooperation times self.oppCtr = 0 # counting the opponent's cooperation times
class BoydRLQTableI(Strategy): __QTable = [] # actions 2 - C and D, states, only need 6 states __Counter = 0 __LastLambda = 0.0 __CoopRatio = 0.3 # Poc, the probability of the opponent to cooperate def __init__(self, game): Strategy.__init__(self) self.__LastState = -999 self.__LastAction = -999 self.__CurrentState = -999 self.__CurrentAction = -999 self._lambda = Lambda(BoydRLQTableI.__CoopRatio) self.initialQ(game) # initialise the Q-Table with the formulas BoydRLQTableI.__Counter = 0 self.__ExplorePercentage = 5.0 # for epsilon-greedy BoydRLQTableI.__LastLambda = BoydRLQTableI.__CoopRatio def respond(self, game): BoydRLQTableI.__Counter += 1 if self.getRoundsPlayed() == 0: # for the first round self._lambda.nochange() self.__LastState = round(self._lambda.getValue(), 1) self.__LastAction = 0 BoydRLQTableI.__LastLambda = round(self._lambda.getValue(), 1) return 'C' else: OppLast = self.getLastResponsePair()[1] MyLast = self.getLastResponsePair()[0] # parameter updating if MyLast == 'C' and OppLast == 'C': self._lambda.decrementValue() elif MyLast == 'C' and OppLast == 'D': self._lambda.incrementValue() elif MyLast == 'D' and OppLast == 'C': self._lambda.decrementValue() elif MyLast == 'D' and OppLast == 'D': self._lambda.incrementValue() # decision making with RL Q-Table self.__CurrentState = round(self._lambda.getValue(), 1) FinalDecision = self.learningResult(OppLast, game) BoydRLQTableI.__LastLambda = round(self._lambda.getValue(), 1) return FinalDecision def learningResult(self, OppLastAction, game): # get reward from the payoff for the last round Reward = self.getReward(OppLastAction, game) self.__CurrentAction = self.getBestAction(self.__CurrentState, game) # right version of the Q-Value updating formula BoydRLQTableI.__QTable[int(self.__LastState/0.2)][self.__LastAction] += game.RL_lR * \ (Reward + game.RL_disF * BoydRLQTableI.__QTable[int(self.__CurrentState/0.2)][self.__CurrentAction]- BoydRLQTableI.__QTable[int(self.__LastState/0.2)][self.__LastAction]) self.__LastState = self.__CurrentState self.__LastAction = self.__CurrentAction if self.__CurrentAction == 0: return 'C' else: return 'D' def getBestAction(self, state, game): self.__ExplorePercentage = -(5.0 / game.Iter_N) * BoydRLQTableI.__Counter + 5 if random.randint(0, 99) < self.__ExplorePercentage: return random.randint(0, 1) else: if BoydRLQTableI.__QTable[int( state / 0.2)][0] >= BoydRLQTableI.__QTable[int( state / 0.2)][1]: return 0 else: return 1 def getReward(self, OppLast, GAME): valLambda = float(BoydRLQTableI.__LastLambda) if self.__LastAction == 0: my = 'C' else: my = 'D' return GAME.Games[0].get( (my, OppLast))[0] * (1.0 - valLambda) + GAME.Games[1].get( (my, OppLast))[0] * valLambda def initialQ(self, game): for i in range(0, 6): lambda1 = round(i * 0.2, 1) temp1 = float(BoydRLQTableI.__CoopRatio * (game.Games[0].get(('C', 'C'))[0] * (1.0 - lambda1) + game.Games[1].get( ('C', 'C'))[0] * lambda1) + (1.0 - BoydRLQTableI.__CoopRatio) * (game.Games[0].get(('C', 'D'))[0] * (1.0 - lambda1) + game.Games[1].get( ('C', 'D'))[0] * lambda1)) temp2 = float(BoydRLQTableI.__CoopRatio * (game.Games[0].get(('D', 'C'))[0] * (1.0 - lambda1) + game.Games[1].get( ('D', 'C'))[0] * lambda1) + (1.0 - BoydRLQTableI.__CoopRatio) * (game.Games[0].get(('D', 'D'))[0] * (1.0 - lambda1) + game.Games[1].get( ('D', 'D'))[0] * lambda1)) BoydRLQTableI.__QTable.append([temp1, temp2]) def name(self): return "RL QTable I from Boyd's Tournament" def author(self): return "Alex Gao"
def __init__(self, game): Strategy.__init__(self) self.__LastState = -999 self.__LastAction = -999 self.__CurrentState = -999 self.__CurrentAction = -999 BoydRLQTableIPlus.__gamma = game.Lambda_global gammaSwitch = int(BoydRLQTableIPlus.__gamma * 10) if gammaSwitch == 0: BoydRLQTableIPlus.__CoopRatio = 0.0 BoydRLQTableIPlus.__learningRate = 0.0 BoydRLQTableIPlus.__discountFactor = 0.0 BoydRLQTableIPlus.__ccLambdaA = 0.0 BoydRLQTableIPlus.__ccLambdaB = 4.0 BoydRLQTableIPlus.__cdLambda = 4.0 BoydRLQTableIPlus.__dcLambda = 10.0 BoydRLQTableIPlus.__ddLambda = 10.0 elif gammaSwitch == 2: BoydRLQTableIPlus.__CoopRatio = 0.0 BoydRLQTableIPlus.__learningRate = 0.5 BoydRLQTableIPlus.__discountFactor = 0.0 BoydRLQTableIPlus.__ccLambdaA = 4.0 BoydRLQTableIPlus.__ccLambdaB = 0.0 BoydRLQTableIPlus.__cdLambda = 0.0 BoydRLQTableIPlus.__dcLambda = 10.0 BoydRLQTableIPlus.__ddLambda = 10.0 elif gammaSwitch == 4: BoydRLQTableIPlus.__CoopRatio = 0.5 BoydRLQTableIPlus.__learningRate = 1.0 BoydRLQTableIPlus.__discountFactor = 0.5 BoydRLQTableIPlus.__ccLambdaA = 10.0 BoydRLQTableIPlus.__ccLambdaB = 4.0 BoydRLQTableIPlus.__cdLambda = 0.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 10.0 elif gammaSwitch == 6: BoydRLQTableIPlus.__CoopRatio = 0.5 BoydRLQTableIPlus.__learningRate = 0.5 BoydRLQTableIPlus.__discountFactor = 0.5 BoydRLQTableIPlus.__ccLambdaA = 2.0 BoydRLQTableIPlus.__ccLambdaB = 2.0 BoydRLQTableIPlus.__cdLambda = 4.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 0.0 elif gammaSwitch == 8: BoydRLQTableIPlus.__CoopRatio = 1.0 BoydRLQTableIPlus.__learningRate = 1.0 BoydRLQTableIPlus.__discountFactor = 1.0 BoydRLQTableIPlus.__ccLambdaA = 10.0 BoydRLQTableIPlus.__ccLambdaB = 4.0 BoydRLQTableIPlus.__cdLambda = 10.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 0.0 elif gammaSwitch == 10: BoydRLQTableIPlus.__CoopRatio = 1.0 BoydRLQTableIPlus.__learningRate = 1.0 BoydRLQTableIPlus.__discountFactor = 0.0 BoydRLQTableIPlus.__ccLambdaA = 10.0 BoydRLQTableIPlus.__ccLambdaB = 8.0 BoydRLQTableIPlus.__cdLambda = 0.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 4.0 self._lambda = Lambda(BoydRLQTableIPlus.__CoopRatio) self.initialQ(game) # initialise the Qtable BoydRLQTableIPlus.__Counter = 0 self.__ExplorePercentage = 0.0 # for epsilon-greedy, epsilon = 0 BoydRLQTableIPlus.__LastLambda = BoydRLQTableIPlus.__CoopRatio
class BoydRLQTableIPlus(Strategy): __QTable = [] # actions 2 - C and D, states, only need 6 states __Counter = 0 __LastLambda = 0.0 __CoopRatio = 0.3 # Poc, the probability of the opponent to cooperate __learningRate = 0.1 __discountFactor = 0.9 __ccLambdaA = 3 __ccLambdaB = 2.5 __cdLambda = 2.5 __dcLambda = 5 __ddLambda = 1 __gamma = 0.5 def __init__(self, game): Strategy.__init__(self) self.__LastState = -999 self.__LastAction = -999 self.__CurrentState = -999 self.__CurrentAction = -999 BoydRLQTableIPlus.__gamma = game.Lambda_global gammaSwitch = int(BoydRLQTableIPlus.__gamma * 10) if gammaSwitch == 0: BoydRLQTableIPlus.__CoopRatio = 0.0 BoydRLQTableIPlus.__learningRate = 0.0 BoydRLQTableIPlus.__discountFactor = 0.0 BoydRLQTableIPlus.__ccLambdaA = 0.0 BoydRLQTableIPlus.__ccLambdaB = 4.0 BoydRLQTableIPlus.__cdLambda = 4.0 BoydRLQTableIPlus.__dcLambda = 10.0 BoydRLQTableIPlus.__ddLambda = 10.0 elif gammaSwitch == 2: BoydRLQTableIPlus.__CoopRatio = 0.0 BoydRLQTableIPlus.__learningRate = 0.5 BoydRLQTableIPlus.__discountFactor = 0.0 BoydRLQTableIPlus.__ccLambdaA = 4.0 BoydRLQTableIPlus.__ccLambdaB = 0.0 BoydRLQTableIPlus.__cdLambda = 0.0 BoydRLQTableIPlus.__dcLambda = 10.0 BoydRLQTableIPlus.__ddLambda = 10.0 elif gammaSwitch == 4: BoydRLQTableIPlus.__CoopRatio = 0.5 BoydRLQTableIPlus.__learningRate = 1.0 BoydRLQTableIPlus.__discountFactor = 0.5 BoydRLQTableIPlus.__ccLambdaA = 10.0 BoydRLQTableIPlus.__ccLambdaB = 4.0 BoydRLQTableIPlus.__cdLambda = 0.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 10.0 elif gammaSwitch == 6: BoydRLQTableIPlus.__CoopRatio = 0.5 BoydRLQTableIPlus.__learningRate = 0.5 BoydRLQTableIPlus.__discountFactor = 0.5 BoydRLQTableIPlus.__ccLambdaA = 2.0 BoydRLQTableIPlus.__ccLambdaB = 2.0 BoydRLQTableIPlus.__cdLambda = 4.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 0.0 elif gammaSwitch == 8: BoydRLQTableIPlus.__CoopRatio = 1.0 BoydRLQTableIPlus.__learningRate = 1.0 BoydRLQTableIPlus.__discountFactor = 1.0 BoydRLQTableIPlus.__ccLambdaA = 10.0 BoydRLQTableIPlus.__ccLambdaB = 4.0 BoydRLQTableIPlus.__cdLambda = 10.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 0.0 elif gammaSwitch == 10: BoydRLQTableIPlus.__CoopRatio = 1.0 BoydRLQTableIPlus.__learningRate = 1.0 BoydRLQTableIPlus.__discountFactor = 0.0 BoydRLQTableIPlus.__ccLambdaA = 10.0 BoydRLQTableIPlus.__ccLambdaB = 8.0 BoydRLQTableIPlus.__cdLambda = 0.0 BoydRLQTableIPlus.__dcLambda = 0.0 BoydRLQTableIPlus.__ddLambda = 4.0 self._lambda = Lambda(BoydRLQTableIPlus.__CoopRatio) self.initialQ(game) # initialise the Qtable BoydRLQTableIPlus.__Counter = 0 self.__ExplorePercentage = 0.0 # for epsilon-greedy, epsilon = 0 BoydRLQTableIPlus.__LastLambda = BoydRLQTableIPlus.__CoopRatio def respond(self, game): BoydRLQTableIPlus.__Counter += 1 if self.getRoundsPlayed() == 0: # for the first round self._lambda.nochange() self.__LastState = round(self._lambda.getValue(), 1) self.__LastAction = 0 BoydRLQTableIPlus.__LastLambda = round(self._lambda.getValue(), 1) return 'C' else: # from the second round OppLast = self.getLastResponsePair()[1] MyLast = self.getLastResponsePair()[0] if MyLast == 'C' and OppLast == 'C': self._lambda.decrementValue() elif MyLast == 'C' and OppLast == 'D': self._lambda.incrementValue() elif MyLast == 'D' and OppLast == 'C': self._lambda.decrementValue() elif MyLast == 'D' and OppLast == 'D': self._lambda.incrementValue() self.__CurrentState = round(self._lambda.getValue(), 1) FinalDecision = self.learningResult(OppLast, game) BoydRLQTableIPlus.__LastLambda = self._lambda.getValue() return FinalDecision def learningResult(self, OppLastAction, game): Reward = self.getReward(OppLastAction, game) self.__CurrentAction = self.getBestAction(self.__CurrentState, game) BoydRLQTableIPlus.__QTable[int(self.__LastState/0.2)][self.__LastAction] += \ game.RL_lR * (Reward + game.RL_disF * (BoydRLQTableIPlus.__QTable[int(self.__CurrentState/0.2)][self.__CurrentAction]- BoydRLQTableIPlus.__QTable[int(self.__LastState/0.2)][self.__LastAction])) # BoydRLQTableIPlus.TrainedQ = BoydRLQTableIPlus.__QTable self.__LastState = self.__CurrentState self.__LastAction = self.__CurrentAction if self.__CurrentAction == 0: return 'C' else: return 'D' def getBestAction(self, state, game): self.__ExplorePercentage = -( 5.0 / game.Iter_N) * BoydRLQTableIPlus.__Counter + 5 if random.randint(0, 99) < self.__ExplorePercentage: return random.randint(0, 1) else: if BoydRLQTableIPlus.__QTable[int( state / 0.2)][0] >= BoydRLQTableIPlus.__QTable[int( state / 0.2)][1]: return 0 else: return 1 def getReward(self, OppLast, GAME): # use the selected parameters valLambda = float(BoydRLQTableIPlus.__LastLambda) if OppLast == 'C': opL = 0 else: opL = 1 if self.__LastAction == 0 and opL == 0: return BoydRLQTableIPlus.__ccLambdaA * ( 1.0 - valLambda) + BoydRLQTableIPlus.__ccLambdaB * valLambda elif self.__LastAction == 0 and opL == 1: return BoydRLQTableIPlus.__cdLambda * valLambda elif self.__LastAction == 1 and opL == 0: return BoydRLQTableIPlus.__dcLambda * (1.0 - valLambda) else: return BoydRLQTableIPlus.__ddLambda * (1.0 - valLambda) def initialQ(self, game): for i in range(0, 6): lambda1 = round(i * 0.2, 1) temp1 = float( BoydRLQTableIPlus.__CoopRatio * (BoydRLQTableIPlus.__ccLambdaA * (1.0 - lambda1) + BoydRLQTableIPlus.__ccLambdaB * lambda1) + (1.0 - BoydRLQTableIPlus.__CoopRatio) * (BoydRLQTableIPlus.__ccLambdaB * lambda1)) temp2 = float(BoydRLQTableIPlus.__CoopRatio * BoydRLQTableIPlus.__dcLambda * (1.0 - lambda1) + (1.0 - BoydRLQTableIPlus.__CoopRatio) * lambda1) BoydRLQTableIPlus.__QTable.append([temp1, temp2]) def name(self): return "RL QTable I+ from Boyd's Tournament" def author(self): return "Theodore Boyd"
def __init__(self, game): Strategy.__init__(self) self._lambda = Lambda(random.random()) # random initialisation for the lambda
def __init__(self, game): Strategy.__init__(self) self._lambda = Lambda(0.0)