/
game.py
484 lines (420 loc) · 17.1 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
# This class contains algorithms of all agent techniques
# Here you can found:
# Greedy Algorithm
# Min Max Algorithm
# Min Max with Alpha Beta Prunning Algorithm
# Reinforcement Learning Algorithm
# However this class uses other class data structure values
# Also it modifies it from here
import board
import random
# This is the global variable for defining depth of the tree
depth = 0
stack = []
class game:
"""
Initialize class varaibles with pit counts and number of pieces in each board
Also initailize current score with 0 for each player and current player turn
"""
def __init__(self, no_of_pits, pieces):
self.current_game_board = board.board(no_of_pits, pieces)
self.current_score = [0,0]
self.current_player = 0
'''
This function switch the current player
'''
def changePlayerTurn(self):
# this value stored in class variable so all of the function can use it
self.current_player = (self.current_player + 1) % 2
'''
This function update the current scores for the current player
'''
def updateScore(self, score):
# Score value added to current player score
self.current_score[self.current_player] = self.current_score[self.current_player] + score
'''
This function returns True if game is end
or it will return False otherwise
'''
def checkGameStatus(self):
if self.current_game_board.isEmptyBoard():
return False
else:
return True
'''
This function get win this game currently
Also it return the winning player once called after the game is over
'''
def getCurrentWinner(self):
return max(xrange(2), key=self.current_score.__getitem__)
'''
This function prints move of the current player in the console
'''
def printMove(self, value):
if value > 6:
value = value - 6
else:
value = value + 1
print(" Player {0} choosed {1} ".format(self.current_player, value))
'''
This is the Naive Agent
'''
def naiveAgent(self):
# Random choice from the current choices
pit = random.choice(self.current_game_board.getCurrentChoices(self.current_player))
# Make moves and switchting the player
self.printMove(pit)
score = self.current_game_board.move(pit, False, self.current_score)
self.updateScore(score)
self.changePlayerTurn()
return pit
'''
This is the greedy agent
'''
def greedyAgent(self):
# Gets current choices for this agent
max_score = float("-inf")
max_move = 0
moves = self.current_game_board.getCurrentChoices(self.current_player)
isCloneMove = True
# It finds the best move to play to increse the winning chances
for move_option in moves:
new_board = self.current_game_board.clone()
new_board_score = new_board.move(move_option, isCloneMove, self.current_score)
if new_board_score > max_score:
max_score = new_board_score
max_move = move_option
pit = max_move
# Make moves and switchting the player
self.printMove(pit)
score = self.current_game_board.move(pit, False, self.current_score)
self.updateScore(score)
self.changePlayerTurn()
return pit
'''
This is the Min Max agent
'''
def miniMaxValue(self, board, limit, player, checker = 0):
# The function changes min/max utility based on the player passed
# Getting class depth value
global depth
depth = depth + 1
# If the board is half empty the game is over
if board.isEmptyBoard():
return 0
# Gets current choices for this agent
max_score = float("-inf")
max_move = 0
moves = board.getCurrentChoices(player)
isCloneMove = True
# The algorithm take moves such that the selection is made based on maximizing one score and minimizing a different score at each step
# First of all check if the game state is in the level just above the limit in the min max tree
# Then Only check for maximum profit of the current player and return the best move
if ((depth == (limit - 1)) or ((depth == limit) and (limit == 1))):
for move_option in moves:
new_board = board.clone()
new_board_score = new_board.move(move_option, isCloneMove, self.current_score)
if new_board_score > max_score:
max_score = new_board_score
max_move = move_option
# On any depth other than limit - 1 recursively call the function and get the best score of the opponent in the resulting stage of the current move
# Calculate the ratio of the addition to current player score by the move, to best potential addition to the opponent score of the resulting game state by the move
# Choose the move with maximum ratio
else:
max_ratio = float("-inf")
# Iterate over all of the choices
for move_option in moves:
new_board = board.clone()
new_player = (player + 1) % 2
score = new_board.move(move_option, isCloneMove, self.current_score)
# This is the recursion call for another player
# This time will be minimizing the score for oppenents
opponent_score = self.miniMaxValue(new_board, limit, new_player, depth)
if opponent_score != 0:
if float(score/opponent_score) > max_ratio:
max_score = score
max_ratio = float(score/opponent_score)
max_move = move_option
else:
max_score = score
max_move = move_option
if checker == 1:
return max_move
else:
return max_score
'''
This is the Min Max agent
'''
def minMaxAgent(self, limit):
# It plays the game based on miniMaxValue function value
# since the entire tree traversal is not good choice, a depth limit is provided
# Getting class depth value
global depth
# Getting next move value
pit = self.miniMaxValue(self.current_game_board, limit, self.current_player, 1)
depth = depth - 1
# Make moves and switchting the player
self.printMove(pit)
score = self.current_game_board.move(pit, False, self.current_score)
self.updateScore(score)
self.changePlayerTurn()
return pit
'''
This is the Min Max agent
'''
def GetAlphaBetaScore(self, board, limit, player, alpha, beta, checker = 0):
# The function changes min/max utility with alpha beta prunning based on the player passed
# Getting class depth value
global depth
depth = depth + 1
# If the board is half empty the game is over
if board.isEmptyBoard():
return 0
# Gets current choices for this agent
max_score = float("-inf")
max_move = 0
moves = board.getCurrentChoices(player)
isCloneMove = True
# The algorithm take moves such that the selection is made based on maximizing one score and minimizing a different score at each step
# Also it prunes the some of the tree to avoid bigger space requirement and to decrease time complexity
# First of all check if the game state is in the level just above the limit in the min max tree
# Then Only check for maximum profit of the current player and return the best move
if ((depth == (limit - 1)) or ((depth == limit) and (limit == 1))):
for move_option in moves:
new_board = board.clone()
new_board_score = new_board.move(move_option, isCloneMove, self.current_score)
if new_board_score > max_score:
max_score = new_board_score
max_move = move_option
# On any depth other than limit - 1 recursively call the function and get the best score of the opponent in the resulting stage of the current move
# Calculate the ratio of the addition to current player score by the move, to best potential addition to the opponent score of the resulting game state by the move
# Choose the move with maximum ratio
else:
max_ratio = float("-inf")
# Iterate over all of the choices
for move_option in moves:
new_board = board.clone()
new_player = (player + 1) % 2
score = new_board.move(move_option, isCloneMove, self.current_score)
# if new score is smaller than alpha value then prune rest of tree branch
if (score <= alpha):
return score
# if new score is greater than beta value then prune rest of tree branch
if (score >= beta):
return score
# This is the recursion call for another player
# This time will be minimizing the score for oppenents
opponent_score = self.GetAlphaBetaScore(new_board, limit, new_player, alpha + score, beta + score)
if opponent_score != 0:
if float(score/opponent_score) > max_ratio:
max_score = score
max_ratio = float(score/opponent_score)
max_move = move_option
else:
max_score = score
max_move = move_option
if checker == 1:
return max_move
else:
return max_score
def AlphaBetaPlay(self, limit):
# It plays the game based on modified GetAlphaBetaScore function value
# since the entire tree traversal is not optimal, a depth limit is provided
# Getting class depth value
global depth
#choosing a pit
alpha = -float('inf')
beta = -alpha
# Getting next move value
pit = self.GetAlphaBetaScore(self.current_game_board, limit, self.current_player, alpha, beta, 1)
depth = depth - 1
# Make moves and switchting the player
self.printMove(pit)
score = self.current_game_board.move(pit, False, self.current_score)
self.updateScore(score)
self.changePlayerTurn()
return pit
'''
This function switch the current player
'''
def addtostack(self,x,y,z):
global stack
stack.append([x,y,z])
"""
This is the reinforcement learning agent
"""
def rlagent(self, board):
# A simple reinforcement learning agent
# first the agent is trained a small number of times
global stack
x = 0
moves = self.current_game_board.getCurrentChoices(self.current_player)
possibilities = []
pit = random.choice(moves)
# it finds possible solution from previous results and make optimal choice
for i in stack:
if i[0] == tuple(board):
possibilities.append(i)
# If there was same state in the dataset then it checks rewards if it is higher then it will choose that pit
for poss in possibilities:
if poss[2] >= 0:
for choice in moves:
if poss[1]==choice:
poss[2] = poss[2]+1
pit = poss[1]
self.printMove(pit)
score = self.current_game_board.move(pit, False, self.current_score)
self.updateScore(score)
self.changePlayerTurn()
return pit
"""
This function trains the agent by playing the games.
"""
def trainAgent(self):
l=[]
for i in self.current_game_board.board:
l.append(i)
#print(tuple(l))
pit = random.choice(self.current_game_board.getCurrentChoices(self.current_player))
global stack
# Make moves and switchting the player
score = self.current_game_board.move(pit, False, self.current_score)
if score>0:
reward = 1
else:
reward = -1
self.updateScore(score)
self.addtostack(tuple(l), pit+1, reward)
self.changePlayerTurn()
"""
This function starts the game
"""
def Play(self):
'''
It play the game until a end of the game state is not reached
'''
# Asking for input from the user
global stack
x = 0
# Getting previously trained data
file=open("data.txt","r")
for line in file:
stack.append(line)
stack = [y.strip() for y in stack]
print("Enter 1 for Naive Agent")
print("Enter 2 for Greedy Agent")
print("Enter 3 for Min Max Agent")
print("Enter 4 for Min Max with alpha beta Agent")
print("Enter 5 for reinforcement learning Agent")
player = [0,0]
# For Player 0
while player[0] not in [1,2,3,4,5]:
try:
# It will take 1, 2, 3, and 4 as a input only
player[0] = int(input("Enter number for player 0:"))
if player[0] not in [1,2,3,4,5]:
print("Please enter correct number")
except:
print("Please provide valid input")
# For Player 1
while player[1] not in [1,2,3,4,5]:
try:
# It will take 1, 2, 3, and 4 as a input only
player[1] = int(input("Enter number for player 1:"))
if player[1] not in [1,2,3,4,5]:
print("Please enter correct number")
except:
print("Please provide valid input")
print("----------------------------------------------------")
print(" Initial Board:")
print("----------------------------------------------------")
self.current_game_board.printBoard(self.current_score)
player0_stack = []
player1_stack = []
# Starts playing the game
# It will choose agent according to the give output
while self.checkGameStatus():
if self.current_player == 0:
current_state = []
pit = 0
if player[0] == 1:
pit = self.naiveAgent()
elif player[0] == 2:
pit = self.greedyAgent()
elif player[0] == 3:
pit = self.minMaxAgent(2)
elif player[0] == 4:
pit = self.AlphaBetaPlay(2)
elif player[0] == 5:
pit = self.rlagent(self.current_game_board.board)
player0_stack.append([tuple(self.current_game_board.board), pit, 0])
self.current_game_board.printBoard(self.current_score)
else:
current_state = []
pit = 0
if player[1] == 1:
pit = self.naiveAgent()
elif player[1] == 2:
pit = self.greedyAgent()
elif player[1] == 3:
pit = self.minMaxAgent(2)
elif player[1] == 4:
pit = self.AlphaBetaPlay(2)
elif player[1] == 5:
pit = self.rlagent(self.current_game_board.board)
player1_stack.append([tuple(self.current_game_board.board), pit, 0])
self.current_game_board.printBoard(self.current_score)
# adding reward to winning player
if self.current_score[0] > self.current_score[1]:
for x in player0_stack:
x[2] = x[2] + 1
else:
for x in player1_stack:
x[2] = x[2] + 1
player0_stack.extend(player1_stack)
self.current_game_board.getAllPiecesElements(self.current_score)
# Adding new states and moves to the file
f = open("data.txt","a")
for i in player0_stack:
f.writelines(str(i))
f.write("\n")
self.printGame()
"""
This function declares the result of the current game
The output will be
Player 0 won or
Player 1 won or
It's a draw
"""
def printGame(self):
print("----------------------------------------------------")
print(" Final Board:")
print("----------------------------------------------------")
self.current_game_board.printBoard(self.current_score)
if self.current_score[0]>self.current_score[1]:
print("")
print("##########################################################")
print(" Player 0 won!! ")
print("##########################################################")
print("")
elif self.current_score[1]>self.current_score[0]:
print("")
print("##########################################################")
print(" Player 1 won!! ")
print("##########################################################")
print("")
else:
print("")
print("##########################################################")
print(" It's a draw!! ")
print("##########################################################")
print("")
def __main__(self):
self.Play()
# This is the driver function for playing the games
# Initialize the game and starts playing
# You can pass different pieces for each pits or different number of pits
# Here 14 is the number of pits
# 5 is the number of pieces in each pit
newgame = game(14, 5)
newgame.__main__()