def default_policy(state): #需要一种新的衡量reward的方式 game = Game() game.setstate(state) depth = 0 while (depth < 15 and not game.end): actions = getactions(game.getstate()) a = random.choice(actions) game.move(a) depth = depth + 1 return game.getscore()
def MCTStreesearch(state): max_depth = 10 max_iter = 60 branch_scores = [0] * 4 branch_counts = [0] * 4 available_branches = [] for i in range(4): if (just_move(state.copy(), i) == 1): available_branches.append(i) for i in range(max_iter): branch = choice(available_branches) #随机选取一个方向 game = Game() game.setstate(state) if (game.end): return 4 depth = 0 while (True): if game.end or depth > max_depth: branch_scores[branch] += game.getscore() branch_counts[branch] += 1 break # first move is down the selected branch if depth == 0: next_move = branch else: # otherwise play out randomly available_moves = [] current_state = game.getstate() for i in range(4): if (just_move(current_state.copy(), i) == 1): available_moves.append(i) next_move = choice(available_moves) # keep track of score based on move selection game.move(next_move) depth += 1 branch_counts = np.array(branch_counts) branch_counts = np.where(branch_counts == 0, 1.0, branch_counts) # avoid divide by zero branch_results = np.array(branch_scores) / branch_counts move = np.where(branch_results == np.max(branch_results))[0][0] return move
from my2048 import Game from MCTStree import MCTStreesearch import numpy as np ''' 输入是16个int型的数据,用分号隔开,表示棋盘格局 输出是0~4的数据 0 1 2 3表示的是上,右,下,左 如果输出4 则表示已经死局 java只负责发送棋盘数据,python负责处理AI逻辑,分数与随机数生成都由java负责 ''' if __name__ == "__main__": letjit = np.array([[0, 0, 0, 0], [0, 2, 2, 0], [0, 2, 2, 0], [0, 0, 0, 0]], dtype='int32') MCTStreesearch(letjit) #run the function once so that it auto jit game = Game() while (True): message = input() if (message == "end"): break game.build(message) state = game.getstate() print(MCTStreesearch(state))
import matplotlib.pyplot as plt import matplotlib from my2048 import Game from MCTStree import MCTStreesearch from uct import uct_search # 设置中文字体和负号正常显示 matplotlib.rcParams['font.sans-serif'] = ['SimHei'] matplotlib.rcParams['axes.unicode_minus'] = False score = [] maxtile = [] index = [i for i in range(10)] for i in range(10): game = Game() while (not game.end): state = game.getstate() #move = MCTStreesearch(state) move = uct_search(state, 60) game.move(move) print(game.info()) score.append(game.getscore()) maxtile.append(game.max()) plt.xlabel('iter') plt.ylabel('num') plt.plot(index, score, color='skyblue', label='游戏分数') plt.plot(index, maxtile, color='green', label='最大方块值') plt.legend() plt.show()
def next_state(state, a): game = Game() game.setstate(state) game.move(a) return game.getstate()
def is_terminal(self): game = Game() game.setstate(self.state) return game.end
next_move = branch else: # otherwise play out randomly available_moves = [] current_state = game.getstate() for i in range(4): if (just_move(current_state.copy(), i) == 1): available_moves.append(i) next_move = choice(available_moves) # keep track of score based on move selection game.move(next_move) depth += 1 branch_counts = np.array(branch_counts) branch_counts = np.where(branch_counts == 0, 1.0, branch_counts) # avoid divide by zero branch_results = np.array(branch_scores) / branch_counts move = np.where(branch_results == np.max(branch_results))[0][0] return move if __name__ == "__main__": t1 = time.time() game = Game() while (not game.end): state = game.getstate() move = MCTStreesearch(state) game.move(move) print(game.info()) t2 = time.time() print(t2 - t1)
import torch from section_cnn import CNNformuti from my2048 import Game datalist = [ 2**x for x in range(16) ] #[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768] datalist[0] = 0 ''' 这是cnn的改进版,采用了多个卷积核滤过,然后cat在一起,进入全连接层,可以学习到更多的特征 从单通道变为了16通道,值映射到了0~1之间,更适合训练,准确率能到达70左右,但是实操的时候还是不尽人意 问题在于,生成的数据可能真的不太适合训练,他看不太出其中的规律,然后会出现无效的移动,死循环在本地 ''' state_dict = torch.load("pkl/mutichannelCNN_parameter.pkl", map_location='cpu') game = Game() net = CNNformuti() net.load_state_dict(state_dict) steps = 0 while not game.end: game.display() grid = game.getstate() muti_data = np.zeros(shape=(16, 4, 4), dtype='float32') for i in range(4): for j in range(4): v = grid[i, j] muti_data[datalist.index(v), i, j] = 1.0 grid = torch.from_numpy(muti_data.reshape((1, 16, 4, 4))) output = net(grid) index = torch.argmax(output)
from my2048 import Game, print_grid from minimax import search_minimax import numpy as np game = Game() while not game.end: state = game.getstate() print_grid(state) bestmove = search_minimax(state) print("AI suggests bestmove is:" + str(bestmove)) game.move(bestmove) message = game.info() print(message) # game.setstate(np.array([[2,2,8,16], # [4,4,16,32], # [2,2,8,64], # [2,4,2,512]],dtype='int32')) # state = game.getstate() # print_grid(state) # bestmove = search_minimax(state) # print("AI suggests bestmove is:" + str(bestmove)) # game.move(bestmove)