def __init__(self, param=None): self.HOME_PATH = param self.mct = MCTS(5, MCTconfig()) self.gr = GR() self.gcnt = Count() self.epsilon = 0.3 # epsilon greedy self.mem = BrainMemory(50, self.HOME_PATH)
class Brain: def __init__(self, param=None): self.HOME_PATH = param self.mct = MCTS(5, MCTconfig()) self.gr = GR() self.gcnt = Count() self.epsilon = 0.3 # epsilon greedy self.mem = BrainMemory(50, self.HOME_PATH) def getAction(self, cmd): # cmd.body==(board,turn) state = cmd.body[0].copy() turn = cmd.body[1] winner = cmd.body[2] if len(cmd.body[3]) > 0: othercmd = cmd.body[3] if othercmd[0] == 1: # write model p = os.path.join(self.HOME_PATH, "lucas_model") self.mct.nn.writeModel(p) elif othercmd[0] == 2: # read model p = os.path.join(self.HOME_PATH, "lucas_model") self.mct.nn.loadModel(p) return GameSignal(2, None) if len(winner) > 0: self.winnerKnown(winner[0]) self.mct.nn.train(self.mem.usable_memory) newsig = GameSignal(2, None) return newsig self.gcnt.incrs() self.mct.addRoot(copy.deepcopy(state), turn) pi, v = self.mct.getPiV() pi = np.array(pi) actn_list = pi[:, 0] v_list = pi[:, 1] m = np.argwhere(v_list == max(v_list)) # epsilon greedy if np.random.random() < self.epsilon: m = np.random.randint(0, len(actn_list)) m = actn_list[m] actn = int(m + 0.5) move = self.gr.actn2move(actn, state, turn) self.gcnt.incrs() newsig = GameSignal(1, move) pi = [0] * 577 j = 0 for i in actn_list: k = int(i + 0.5) pi[k] = v_list[j] j += 1 self.mem.addOneFrame(state, pi, turn) return newsig def winnerKnown(self, winner): self.mem.buildUsableMemory(winner, self.mct.config.maxPossibleReward) def selfLearn(self, cmd): state = cmd[1][0] turn = cmd[1][1] mcts.addRoot(state, turn) pi, v = mcts.getPiV() pi = np.array(pi) ## -- .... move = gr.getMoveWithActn(state, turn, m) return move def observe(self, state): pass
def __init__(self, sim_num,config): self.gr=GR() self.sim_num=sim_num self.config=config self.nn=MyModel(ModelConfig())
class MCTS(): def __init__(self, sim_num,config): self.gr=GR() self.sim_num=sim_num self.config=config self.nn=MyModel(ModelConfig()) def addRoot(self, state,turn): self.tree={'Nodes':[], 'Edges':[] } self.expandLeafNode(state,turn,None) self.simulate() def expandLeafNode(self,state,turn,parentIndex,path=[]): st=copy.deepcopy(state) rn=Node(st,turn,parentIndex) self.tree['Nodes'].append(rn) a=self.gr.getAllowedMoves(copy.deepcopy(rn.state),turn,'actn') if len(a)==0: self.tree['Edges'].append([]) v=-self.config.maxPossibleReward else: actionValue=self.nn.evaluate(copy.deepcopy(rn.state),rn.turn) v=actionValue[-1] p=[] for actn in a: e=Edge(parentIndex=0,actn=actn) e.N=0 e.W=0 e.Q=0 e.P=actionValue[actn] e.parentIndex=parentIndex e.turn=turn p.append(e) self.tree['Edges'].append(p) # backfill if len(path)>0: leafplayer=turn for edge in path: if edge.turn==turn: edge.W+=v else: edge.W+=-v edge.N+=1 edge.Q=edge.W/edge.N def simulate(self): for sn in range(self.sim_num ): cix = 0 # current node's index path=[] # -- determine a leaf node t=0 while len(self.tree['Edges'][cix])!=0 and t<self.config.L: t+=1 edges=self.tree['Edges'][cix] maxqu = -self.config.inf sum_N=0 for g in edges: sum_N+=g.N choosen=0 for i,g in enumerate(edges): u = self.config.cpuct * g.P *np.sqrt(sum_N)/(1+g.N) q = g.Q squ=q+u if squ>maxqu: choosen=g.actn choosenEdge=g path.append(choosenEdge) if choosenEdge.N==0: # leaf node break else: cix=choosenEdge.childIndex # -- add new State to tree if len(self.tree['Edges'][cix])==0: # end game turn=self.tree['Nodes'][cix].turn v=-self.config.maxPossibleReward if len(path)>0: for edge in path: if edge.turn==turn: edge.W+=v else: edge.W+=-v edge.N+=1 edge.Q=edge.W/edge.N else: # unexpanded node turn=self.tree['Nodes'][cix].turn newst=self.gr.takeAction(self.tree['Nodes'][cix].state, choosen,turn) choosenEdge.childIndex=len(self.tree['Nodes']) self.expandLeafNode(newst,-turn,cix,path) def getPiV(self): if len(self.tree['Nodes'])==0: return pi=[] sumn=0 v=-self.config.inf for e in self.tree['Edges'][0]: sumn+=e.N if e.Q>v: v=e.Q for e in self.tree['Edges'][0]: pi.append([e.actn,e.N/sumn]) return pi, v
else: edge.W+=-v edge.N+=1 edge.Q=edge.W/edge.N else: # unexpanded node turn=self.tree['Nodes'][cix].turn newst=self.gr.takeAction(self.tree['Nodes'][cix].state, choosen,turn) choosenEdge.childIndex=len(self.tree['Nodes']) self.expandLeafNode(newst,-turn,cix,path) def getPiV(self): if len(self.tree['Nodes'])==0: return pi=[] sumn=0 v=-self.config.inf for e in self.tree['Edges'][0]: sumn+=e.N if e.Q>v: v=e.Q for e in self.tree['Edges'][0]: pi.append([e.actn,e.N/sumn]) return pi, v if __name__=="__main__": gr=GR() mct=MCTS(5, MCTconfig()) mct.addRoot(gr.initialBoard,1) print(mct.getPiV())
def __init__(self, HumanPlayWhite=True, HumanPlayBlack=True): # -- path configure if getattr(sys, 'frozen', False): self.HOME_PATH = os.path.dirname(sys.executable) elif __file__: self.HOME_PATH = os.path.dirname(__file__) # -- vital assets tk.Tk.__init__(self, None) self.gr = GR() # -- sync with other module self.gcnt = Count() self.gameMode = 0 self.engineTurn = 0 # init self.othercmd = [] self.generate_move_flag = 0 # gui control self.freezeboard = 0 self.clickstate = 0 #0 wait for click, 1 valid click once, 2 valid click twice # -- game parameters self.currentstep = 0 self.bplayer = int(HumanPlayBlack) # human 1, ai 0 self.wplayer = int(HumanPlayWhite) self.cols = 6 self.rows = 6 self.img_backup = {} self.pieces = {} self.state = [[0, 0, 0, -1, 0, 0], [0, 0, 0, 0, 0, 0], [-1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0]] # -- ui #super(ChessBoard, self).__init__() self.title("Lucas MAchess") # ui.0 controllers # ui.0.0 menubar self.MAmenuBar = tk.Menu(self) self.config(menu=self.MAmenuBar) filemenu = tk.Menu(self.MAmenuBar) settingmenu = tk.Menu(self.MAmenuBar) self.MAmenuBar.add_cascade(label="File", menu=filemenu) self.MAmenuBar.add_cascade(label='Settings', menu=settingmenu) filemenu.add_command(label="open") filemenu.add_command(label="save") settingmenu.add_command(label='load model', command=self.load_model) settingmenu.add_command(label='write model', command=self.write_model) # ui.0.1 button self.button1 = tk.Label(text='Generate Move') self.button1.bind("<ButtonRelease>", self.generate_move) self.button1.pack() self.button2 = tk.Label(text='restart') self.button2.bind("<ButtonRelease>", self.userRestartGame) self.button2.pack() self.button3 = tk.Label(text='toggle self-play') self.button3.bind("<ButtonRelease>", self.toggleSelfTraining) self.button3.pack() # ui.1 canvas canvasBgColor = "#00ff00" self.cellSize = 64 self.whiteCellColor = "#ffd480" self.blackCellColor = "#8b4513" self.pieceid2name = {1: "block", 2: "king"} self.loadPieces() self.canvasHeight = self.rows * self.cellSize self.canvasWidth = self.cols * self.cellSize self.canvas = tk.Canvas(width=self.canvasWidth, height=self.canvasHeight, background=canvasBgColor) self.canvas.pack() self.canvas.bind("<Configure>", self.resizeCanvas) self.canvas.bind("<ButtonRelease>", self.mouseclicked) # ui.2 label self.label1 = tk.Label(self, text='Game Result Pending ...') self.label1.pack() self.status = 3 # start ; 0 restart, 1 pending, 2 endofgame self.turn = self.gr.initialTurn self.self_train_flag = False self.setBasedOnStatus()
class ChessBoard(tk.Tk): # -- internal def __init__(self, HumanPlayWhite=True, HumanPlayBlack=True): # -- path configure if getattr(sys, 'frozen', False): self.HOME_PATH = os.path.dirname(sys.executable) elif __file__: self.HOME_PATH = os.path.dirname(__file__) # -- vital assets tk.Tk.__init__(self, None) self.gr = GR() # -- sync with other module self.gcnt = Count() self.gameMode = 0 self.engineTurn = 0 # init self.othercmd = [] self.generate_move_flag = 0 # gui control self.freezeboard = 0 self.clickstate = 0 #0 wait for click, 1 valid click once, 2 valid click twice # -- game parameters self.currentstep = 0 self.bplayer = int(HumanPlayBlack) # human 1, ai 0 self.wplayer = int(HumanPlayWhite) self.cols = 6 self.rows = 6 self.img_backup = {} self.pieces = {} self.state = [[0, 0, 0, -1, 0, 0], [0, 0, 0, 0, 0, 0], [-1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0]] # -- ui #super(ChessBoard, self).__init__() self.title("Lucas MAchess") # ui.0 controllers # ui.0.0 menubar self.MAmenuBar = tk.Menu(self) self.config(menu=self.MAmenuBar) filemenu = tk.Menu(self.MAmenuBar) settingmenu = tk.Menu(self.MAmenuBar) self.MAmenuBar.add_cascade(label="File", menu=filemenu) self.MAmenuBar.add_cascade(label='Settings', menu=settingmenu) filemenu.add_command(label="open") filemenu.add_command(label="save") settingmenu.add_command(label='load model', command=self.load_model) settingmenu.add_command(label='write model', command=self.write_model) # ui.0.1 button self.button1 = tk.Label(text='Generate Move') self.button1.bind("<ButtonRelease>", self.generate_move) self.button1.pack() self.button2 = tk.Label(text='restart') self.button2.bind("<ButtonRelease>", self.userRestartGame) self.button2.pack() self.button3 = tk.Label(text='toggle self-play') self.button3.bind("<ButtonRelease>", self.toggleSelfTraining) self.button3.pack() # ui.1 canvas canvasBgColor = "#00ff00" self.cellSize = 64 self.whiteCellColor = "#ffd480" self.blackCellColor = "#8b4513" self.pieceid2name = {1: "block", 2: "king"} self.loadPieces() self.canvasHeight = self.rows * self.cellSize self.canvasWidth = self.cols * self.cellSize self.canvas = tk.Canvas(width=self.canvasWidth, height=self.canvasHeight, background=canvasBgColor) self.canvas.pack() self.canvas.bind("<Configure>", self.resizeCanvas) self.canvas.bind("<ButtonRelease>", self.mouseclicked) # ui.2 label self.label1 = tk.Label(self, text='Game Result Pending ...') self.label1.pack() self.status = 3 # start ; 0 restart, 1 pending, 2 endofgame self.turn = self.gr.initialTurn self.self_train_flag = False self.setBasedOnStatus() def userRestartGame(self, event=None): self.restart = 1 self.setBasedOnStatus() def generate_move(self, event=None): self.generate_move_flag = 1 def setBasedOnStatus(self): if self.status == 0: if self.restart == 1: self.state = copy.deepcopy(self.gr.initialBoard) self.currentstep = 0 self.turn = self.gr.initialTurn self.gameResult = self.gr.initialGameResult self.trace = {0: [self.state, None]} self.restart = 0 print( "New Game: step %d, turn: %d, result: %d, last move: %s status: %d" % (self.currentstep, self.turn, self.gameResult, [-1], self.status)) elif self.turn != self.gr.initialTurn: self.status = 1 self.setBasedOnStatus() elif self.status == 1: if self.restart == 1: self.status = 0 self.setBasedOnStatus() else: self.gameResult = self.gr.judge(self.state, self.turn) if self.gameResult != 0: self.status = 2 self.setBasedOnStatus() elif self.status == 2: if self.restart == 1: self.status = 0 self.setBasedOnStatus() elif self.status == 3: self.restart = 0 self.gameResult = self.gr.initialGameResult if self.turn != self.gr.initialTurn: self.status = 1 self.setBasedOnStatus() self.update() def loadPieces(self): #img=Image.open("images\\aaa.png",Image.ANTIALIAS) root_dir = os.path.join(self.HOME_PATH, 'images', 'MAchess') name = 'king' #print(fullname) #img=Image.open(fullname) #img=ImageTk.PhotoImage(file=fullname) #img=tk.PhotoImage(img) wname = os.path.join(root_dir, "whitepieces", name + '.png') bname = os.path.join(root_dir, "blackpieces", name + '.png') #bname=root_dir+"blackpieces\\"+name+'.png' imgw = Image.open(wname) imgw = ImageTk.PhotoImage(imgw) self.img_backup.update({1: imgw}) imgb = Image.open(bname) imgb = ImageTk.PhotoImage(imgb) self.img_backup.update({-1: imgb}) blkname = os.path.join(root_dir, "block.png") imgblk = Image.open(blkname) imgblk = ImageTk.PhotoImage(imgblk) self.img_backup.update({2: imgblk}) #self.pieceColorInvert() #self.pieces.update({"nothing":img}) def placepiece(self, row, column, id=None, natural_name="nothing"): x = (column * self.cellSize) y = (row * self.cellSize) self.canvas.create_image(x, y, image=self.img_backup[id], tags="pieces", anchor='nw') def move(self): if self.freezeboard != 0: return if self.judgeclick() == False: return r1, c1 = self.select1 r2, c2 = self.select2 r3, c3 = self.select3 t = self.state[r1][c1] self.state[r1][c1] = 0 self.state[r2][c2] = t self.state[r3][c3] = 2 self.generate_move_flag = 0 self.turn = -self.turn self.currentstep += 1 self.setBasedOnStatus() print( "New State: step %d, turn: %d, result: %d, last move: %s status: %d" % (self.currentstep, self.turn, self.gameResult, [r1, c1, r2, c2, r3, c3], self.status)) self.update() def update(self): # board minwh = min(self.canvasHeight, self.canvasWidth) self.canvas.delete("pieces") self.cellSize = int(minwh / self.rows) color_list = [self.whiteCellColor, self.blackCellColor] for i in range(self.rows): x1 = i * self.cellSize x2 = x1 + self.cellSize - 1 for j in range(self.cols): currentColorIndex = (i + j) % 2 y1 = j * self.cellSize y2 = y1 + self.cellSize self.canvas.create_rectangle( x1, y1, x2, y2, outline='black', fill=color_list[currentColorIndex], tag='cells') for i in range(self.rows): for j in range(self.cols): s = self.state[i][j] if s != 0: self.placepiece(i, j, s) self.canvas.tag_raise("pieces") self.canvas.tag_lower("cells") # game result self.victoryWindow() def victoryWindow(self): c = self.status if c != 2: self.label1['text'] = 'Game Result Pending ...' return c = self.gameResult if c == 1: self.label1['text'] = 'White Win!' else: self.label1['text'] = 'Black Win!' print("End of one Iteration") def setState(self, state): if self.freezeboard == 1: return self.state = state self.update() def on_closing(self): pass # -- human interaction def write_model(self, event=None): self.othercmd = [1] def load_model(self, event=None): self.othercmd = [2] def resizeCanvas(self, event): # -- resize and re-draw self.canvasWidth = event.width self.canvasHeight = event.height self.update() def mouseclicked(self, event): row = event.y // self.cellSize col = event.x // self.cellSize self.lastrow = row self.lastcol = col self.respondClick() def respondClick(self): row = self.lastrow col = self.lastcol if self.state[row][col] != 0: if self.clickstate == 0: self.clickstate = 1 self.select1 = [row, col] return elif self.clickstate == 1: self.clickstate = 0 return else: self.clickstate = 0 if row == self.select1[0] and col == self.select1[1]: self.select3 = self.select1 self.move() return else: if self.clickstate == 1: self.clickstate = 2 self.select2 = [row, col] return elif self.clickstate == 2: self.clickstate = 0 self.select3 = [row, col] self.move() def judgeclick(self): r1 = self.select1[0] c1 = self.select1[1] if self.state[r1][c1] != self.turn: return False # move1 to move2 r2 = self.select2[0] c2 = self.select2[1] dr = r2 - r1 dc = c2 - c1 if dr == 0: dir = (c2 - c1) // abs(c2 - c1) i = c1 + dir while i != c2: if self.state[r1][i] != 0: return False i += dir if self.state[r2][c2] != 0: return False elif dc == 0: dir = (r2 - r1) // abs(r2 - r1) i = r1 + dir while i != r2: if self.state[i][c1] != 0: return False i += dir if self.state[r2][c2] != 0: return False else: if (abs(dr) != abs(dc)): return False dir1 = (r2 - r1) // abs(r2 - r1) dir2 = (c2 - c1) // abs(c2 - c1) j = c1 + dir2 for i in range(r1 + dir1, r2 + dir1, dir1): if self.state[i][j] != 0: return False j += dir2 # move2 to move3 dr = self.select3[0] - self.select2[0] dc = self.select3[1] - self.select2[1] if abs(dr) > 1 or abs(dc) > 1: return False return True def toggleSelfTraining(self, event=None): self.self_train_flag = not self.self_train_flag # -- engine part def engineApplyMove(self, body): print('engine move %d : %s' % (self.currentstep, body)) # set-up b = body self.select1 = [b[0], b[1]] self.select2 = [b[2], b[3]] self.select3 = [b[4], b[5]] self.move() def getBattleInfo(self): winner = [] if self.self_train_flag == True: self.generate_move_flag = 1 if self.generate_move_flag == 1: cmd = -1 elif self.gameMode == 0: cmd = 0 # do nothing elif self.gameMode == 1: # human vs engine if self.turn == self.engineTurn: cmd = -1 # need action else: cmd = 0 if self.gameResult in self.gr.nonePendingResults: winner.append(copy.deepcopy(self.gameResult)) if self.generate_move_flag == 1: self.generate_move_flag = 0 self.userRestartGame() cmd = -1 state = copy.deepcopy(self.state) u = [] if len(self.othercmd) > 0: u = copy.deepcopy(self.othercmd) self.othercmd = [] cmd = -1 return [cmd, state, self.turn, winner, u] def freeze(self): self.freezeboard = True def defreeze(self): self.freezeboard = False # -- util function def getMove(self, event): r1, c1 = self.select1 r2, c2 = self.select2 r3, c3 = self.select3 return [r1, c1, r2, c2]