def main(self, board, now_turn, playouts): node = self.root if not self.is_expanded(node): # and node.is_leaf() # node.state # print('Expadning Root Node...') situation = [[i == 1, i == 2, now_turn == 1, now_turn == 2] for j in nownode.grid for i in j] input_fn = tf.estimator.inputs.numpy_input_fn(x=features, shuffle=False) action_probs, value = self.estimator.predict(input_fn) if now_turn is 2: value = -value i = 0 b = board(grid=nownode.grid) moves = [] while i <= 169: #including 169, pass if b.ok(i / 13, i % 13, now_turn): moves.append((i / 13, i % 13)) i += 1 del b node.expand(moves, action_probs, now_turn) self.expanded.add(nownode) now_turn = (not (now_turn - 1)) + 1 coroutine_list = [] for _ in range(playouts): coroutine_list.append( self.tree_search(nownode, now_turn, restrict_round)) coroutine_list.append(self.prediction_worker()) self.loop.run_until_complete(asyncio.gather(*coroutine_list))
def __init__(self, search_threads, estimator): self.root = node(x=None, y=None, color=2, depth=0) self.virtual_loss = 3 self.now_expanding = set() self.expanded = set() self.sem = asyncio.Semaphore(search_threads) self.queue = Queue(search_threads) #prediction queue self.loop = asyncio.get_event_loop() self.running_simulation_num = 0 self.board = board() self.estimator = estimator
def expand(self, moves, P, color): total = 1e-8 b = board(grid=self.grid) P = P.flatten() #P is a array from tensorflow for x, y in moves: b.grid = self.grid if x < 13 and y < 13 and x >= 0 and y >= 0: b.play(x, y, color) p = P[x * 13 + y] newnode = node(self, x, y, color, p, b.grid, depth=self.depth + 1) self.child[(x, y)] = newnode total += p for i in self.child.values: i.p /= total
def __init__(self, parent=0, y=0, color=2, board=board(), depth=0): #action169 is pass global c_puct self.b = board #which the move itself represent has been played self.depth = depth self.parent = parent self.w = 0 self.n = 0 self.q = 0 self.update_u(c_puct) self.child = {} #{action(y):children(node)...} self.y = y self.color = color
def makegame(num): myfile = open("random.mg", "w") i = 0 while i < num: if i % 100 == 0: print i / 100, '%' b = board(6.5) count = 0 passes = 0 color = 2 while count < 300: color = (not (color - 1)) + 1 notok = [] j = randint(0, 170) while j in notok or not b.ok(j / 13, j % 13, color): notok.append(j) j = randint(0, 170) b.play(j / 13, j % 13, color) myfile.write(str(j) + ' ') count += 1 i += 1 score = b.final() myfile.write(str(score) + '\n')
def __init__(self, playout=1600, in_batch_size=512, exploration=True, in_search_threads=16, processor="gpu"): self.epochs = 5 self.playout_counts = playout #400 #800 #1600 200 #self.temperature = 1 #1e-8 1e-3 #我不想要用temperature,因為我覺得趨近於零時,就直接選N最大的就好了 self.batch_size = in_batch_size #128 #512 self.start_steps = 30 #when the temperature is 1 self.start_temperature = 1 #2 # self.Dirichlet = 0.3 # P(s,a) = (1 - ϵ)p_a + ϵη_a #self-play chapter in the paper ''' self.eta = 0.03 # self.epsilon = 0.25 # self.v_resign = 0.05 # self.c_puct = 5 self.learning_rate = 0.001 #5e-3 # 0.001 self.lr_multiplier = 1.0 # adaptively adjust the learning rate based on KL ''' self.buffer_size = 10000 self.data_buffer = deque(maxlen=self.buffer_size) self.game_borad = board() if is_using_TPU: self.policy_value_netowrk = tf.estimator.tpu.TPUEstimator( model_fn=resnet_fn_tpu, model_dir="MGmodels") else: self.policy_value_netowrk = tf.estimator.tpu.Estimator( model_fn=resnet_fn, model_dir="/home/kenny/Desktop/python3/MG/MGmodels") self.search_threads = in_search_threads self.mcts = mcts_tree(self.search_threads, self.policy_value_network_gpus) self.exploration = exploration
from player_random import player as randomplayer from player_mcts import player as mctsplayer from player_mcts import node,mcts_tree from rule import board root=node() t=mcts_tree(root) mcts=mctsplayer(t) random=randomplayer() b=board() def agame(): while 1: y=mcts.genmove(1) if y==None: return b.win() b.play(y,1) b.dump() if b.win()!=0: return b.win() random.play(y,1) y=random.genmove(2) if y==None: return b.win() b.play(y,2) b.dump() if b.win()!=0: return b.win() mcts.play(y,2)
from rule import board import numpy as np import randomgame from copy import deepcopy as dc from math import ceil randomgame.makegame(1000) myfile = open("random.mg", "r") a = myfile.readlines() out = [] results = [] for i in a: out.append([int(j) for j in i.split()[0:-1]]) results.append(float(i.split()[-1])) print len(results) b = board(6.5) abatch = 1000 index = 0 batches = int(ceil(len(out) / float(abatch))) for now in range(batches): indata = [] resultdata = [] chancedata = [] batch = out[now * abatch:now * abatch + abatch] for agame in batch: print index b.grid = [] for i in range(13): b.grid.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) color = 1 for coor in agame: #coordinate b.play(coor / 13, coor % 13, color)
class tree: under = [] #son nodes, so it's a list on = [] #father node used = [ ] #used, I use list to save search result, so I'll keep it after being unvaluable. x = [] y = [] color = [] win = [] #win point of black,add for every son node,isn't average times = [] #all search times nowboard = board(6.5) #not for searching def appendnode(self): self.x.append(None) self.y.append(None) self.color.append(None) self.on.append(None) self.under.append([]) self.used.append(0) self.win.append(None) self.times.append(0) def expand( self, getf, nowlocal, board, turn ): #extand leaf node, now local is where the board it is#f is a function, which returns black win point for x in range(13): for y in range(13): if board.ok(x, y, turn): i = 0 while self.used[i]: #find available place to replace if i == len(self.under) - 1: self.appendnode() i += 1 self.x[i] = x self.y[i] = y self.color[i] = turn self.on[i] = nowlocal self.under[nowlocal].append(i) self.under[i] = [] self.used[i] = 1 self.win[i] = getf(board, turn) - board.komi self.times[i] = 1 i = 0 while self.used[i]: #find available place to replace if i == len(self.under) - 1: self.appendnode() i += 1 self.x[i] = 13 self.y[i] = 13 self.color[i] = None self.on[i] = nowlocal self.under[nowlocal].append(i) self.under[i] = [] self.used[i] = 1 self.win[i] = getf(board, (not (turn - 1)) + 1) - board.komi self.times[i] += 1 def search(self, getf, board, inl, d=0): #inl is input local, the list index nowlocal = inl while self.under[nowlocal] != []: for i in self.under[nowlocal]: qu = [] #q+u if self.times[i] == 0: print i qu.append(self.win[i] / self.times[i] + self.times[nowlocal] / self.times[i] * 0.4) #The lastest numder is c_puct if d: print self.times[i] if d: print qu print len(qu) nowlocal = self.under[nowlocal][qu.index(max(qu))] if (self.x[nowlocal] != 13): #if this node isn't pass board.play(self.x[nowlocal], self.y[nowlocal], self.color[nowlocal]) self.expand(getf, nowlocal, board, (not (self.color[nowlocal] - 1)) + 1) #expand while nowlocal != inl: self.win[nowlocal] = 0 #here backup start(init) self.times[nowlocal] = 0 for i in self.under[nowlocal]: self.win[nowlocal] += self.win[i] self.times[nowlocal] += self.times[i] nowlocal = self.on[nowlocal]
def __init__(self): self.board=board()
async def start_tree_search(self, nownode, now_turn): #output is the V for the now turn player now_expanding = self.now_expanding while nownode in now_expanding: await asyncio.sleep(1e-4) if not self.is_expanded(nownode): #here is expanding self.now_expanding.add(nownode) #start expanding situation = [[i == 1, i == 2, now_turn == 1, now_turn == 2] for j in nownode.grid for i in j] future = await self.push_queue(situation) #now it's list await future action_probs, value = future.result() action_probs = action_probs[0] value = value[0][0] if now_turn is 2: value = -value i = 0 b = board(grid=nownode.grid) moves = [] while i <= 169: #including 169, pass if b.ok(i / 13, i % 13, now_turn): moves.append((i / 13, i % 13)) i += 1 del b node.expand(moves, action_probs, now_turn) self.expanded.add(nownode) # nownode.state self.now_expanding.discard(nownode) #don't need to invert because value is always for black return value else: """node has already expanded. Enter select phase.""" # select child nownode with maximum action score #last_state = nownode.state action, nownode = nownode.select() # action_t = self.select_move_by_action_score(key, noise=True) # add virtual loss # self.virtual_loss_do(key, action_t) nownode.N += virtual_loss nownode.W -= virtual_loss # evolve game board status # child_position = self.env_action(position, action_t) if ( nownode.passes and nownode.parent.passes ) or depth is 338: # 338=13*13*2, according to AlphaGoZero Paper Page 22 self.board.grid = nownode.grid value = b.final() / 169.0 else: value = await self.start_tree_search(nownode, now_turn) #遞迴 nownode.N -= virtual_loss nownode.W += virtual_loss # on returning search path # update: N, W, Q, U # self.back_up_value(key, action_t, value) if now_turn is 1: #black nownode.backup(value) else: nownode.backup(-value) #到目前為止nownode都是被選中的child #因為是遞迴,所以除了未展開的節點都會backup #函數看到的value一直都是對黑棋來說的子差/169.0 #但在儲存(backup)時就得把白棋的value變號 return value