Beispiel #1
0
 def main(self, board, now_turn, playouts):
     node = self.root
     if not self.is_expanded(node):  # and node.is_leaf()    # node.state
         # print('Expadning Root Node...')
         situation = [[i == 1, i == 2, now_turn == 1, now_turn == 2]
                      for j in nownode.grid for i in j]
         input_fn = tf.estimator.inputs.numpy_input_fn(x=features,
                                                       shuffle=False)
         action_probs, value = self.estimator.predict(input_fn)
         if now_turn is 2:
             value = -value
         i = 0
         b = board(grid=nownode.grid)
         moves = []
         while i <= 169:  #including 169, pass
             if b.ok(i / 13, i % 13, now_turn):
                 moves.append((i / 13, i % 13))
             i += 1
         del b
         node.expand(moves, action_probs, now_turn)
         self.expanded.add(nownode)
     now_turn = (not (now_turn - 1)) + 1
     coroutine_list = []
     for _ in range(playouts):
         coroutine_list.append(
             self.tree_search(nownode, now_turn, restrict_round))
     coroutine_list.append(self.prediction_worker())
     self.loop.run_until_complete(asyncio.gather(*coroutine_list))
Beispiel #2
0
 def __init__(self, search_threads, estimator):
     self.root = node(x=None, y=None, color=2, depth=0)
     self.virtual_loss = 3
     self.now_expanding = set()
     self.expanded = set()
     self.sem = asyncio.Semaphore(search_threads)
     self.queue = Queue(search_threads)  #prediction queue
     self.loop = asyncio.get_event_loop()
     self.running_simulation_num = 0
     self.board = board()
     self.estimator = estimator
Beispiel #3
0
 def expand(self, moves, P, color):
     total = 1e-8
     b = board(grid=self.grid)
     P = P.flatten()  #P is a array from tensorflow
     for x, y in moves:
         b.grid = self.grid
         if x < 13 and y < 13 and x >= 0 and y >= 0:
             b.play(x, y, color)
         p = P[x * 13 + y]
         newnode = node(self, x, y, color, p, b.grid, depth=self.depth + 1)
         self.child[(x, y)] = newnode
         total += p
     for i in self.child.values:
         i.p /= total
Beispiel #4
0
 def __init__(self,
              parent=0,
              y=0,
              color=2,
              board=board(),
              depth=0):  #action169 is pass
     global c_puct
     self.b = board  #which the move itself represent has been played
     self.depth = depth
     self.parent = parent
     self.w = 0
     self.n = 0
     self.q = 0
     self.update_u(c_puct)
     self.child = {}  #{action(y):children(node)...}
     self.y = y
     self.color = color
Beispiel #5
0
def makegame(num):
    myfile = open("random.mg", "w")
    i = 0
    while i < num:
        if i % 100 == 0:
            print i / 100, '%'
        b = board(6.5)
        count = 0
        passes = 0
        color = 2
        while count < 300:
            color = (not (color - 1)) + 1
            notok = []
            j = randint(0, 170)
            while j in notok or not b.ok(j / 13, j % 13, color):
                notok.append(j)
                j = randint(0, 170)
            b.play(j / 13, j % 13, color)
            myfile.write(str(j) + ' ')
            count += 1
        i += 1
        score = b.final()
        myfile.write(str(score) + '\n')
Beispiel #6
0
    def __init__(self,
                 playout=1600,
                 in_batch_size=512,
                 exploration=True,
                 in_search_threads=16,
                 processor="gpu"):
        self.epochs = 5
        self.playout_counts = playout  #400    #800    #1600    200
        #self.temperature = 1    #1e-8    1e-3
        #我不想要用temperature,因為我覺得趨近於零時,就直接選N最大的就好了
        self.batch_size = in_batch_size  #128    #512
        self.start_steps = 30  #when the temperature is 1
        self.start_temperature = 1  #2
        # self.Dirichlet = 0.3    # P(s,a) = (1 - ϵ)p_a  + ϵη_a    #self-play chapter in the paper
        '''
		self.eta = 0.03
		# self.epsilon = 0.25
		# self.v_resign = 0.05
		# self.c_puct = 5
		self.learning_rate = 0.001    #5e-3    #    0.001
		self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
		'''
        self.buffer_size = 10000
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.game_borad = board()
        if is_using_TPU:
            self.policy_value_netowrk = tf.estimator.tpu.TPUEstimator(
                model_fn=resnet_fn_tpu, model_dir="MGmodels")
        else:
            self.policy_value_netowrk = tf.estimator.tpu.Estimator(
                model_fn=resnet_fn,
                model_dir="/home/kenny/Desktop/python3/MG/MGmodels")
        self.search_threads = in_search_threads
        self.mcts = mcts_tree(self.search_threads,
                              self.policy_value_network_gpus)
        self.exploration = exploration
Beispiel #7
0
from player_random import player as randomplayer
from player_mcts import player as mctsplayer 
from player_mcts import node,mcts_tree
from rule import board

root=node()
t=mcts_tree(root)
mcts=mctsplayer(t)

random=randomplayer()

b=board()
def agame():
	while 1:
		y=mcts.genmove(1)
		if y==None:
			return b.win()
		b.play(y,1)
		b.dump()
		if b.win()!=0:
			return b.win()
		random.play(y,1)
			
		y=random.genmove(2)
		if y==None:
			return b.win()
		b.play(y,2)
		b.dump()
		if b.win()!=0:
			return b.win()
		mcts.play(y,2)
Beispiel #8
0
from rule import board
import numpy as np
import randomgame
from copy import deepcopy as dc
from math import ceil
randomgame.makegame(1000)
myfile = open("random.mg", "r")
a = myfile.readlines()
out = []
results = []
for i in a:
    out.append([int(j) for j in i.split()[0:-1]])
    results.append(float(i.split()[-1]))
print len(results)
b = board(6.5)
abatch = 1000
index = 0
batches = int(ceil(len(out) / float(abatch)))
for now in range(batches):
    indata = []
    resultdata = []
    chancedata = []
    batch = out[now * abatch:now * abatch + abatch]
    for agame in batch:
        print index
        b.grid = []
        for i in range(13):
            b.grid.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        color = 1
        for coor in agame:  #coordinate
            b.play(coor / 13, coor % 13, color)
Beispiel #9
0
class tree:
    under = []  #son nodes, so it's a list
    on = []  #father node
    used = [
    ]  #used, I use list to save search result, so I'll keep it after being unvaluable.
    x = []
    y = []
    color = []
    win = []  #win point of black,add for every son node,isn't average
    times = []  #all search times
    nowboard = board(6.5)  #not for searching

    def appendnode(self):
        self.x.append(None)
        self.y.append(None)
        self.color.append(None)
        self.on.append(None)
        self.under.append([])
        self.used.append(0)
        self.win.append(None)
        self.times.append(0)

    def expand(
        self, getf, nowlocal, board, turn
    ):  #extand leaf node, now local is where the board it is#f is a function, which returns black win point
        for x in range(13):
            for y in range(13):
                if board.ok(x, y, turn):
                    i = 0
                    while self.used[i]:  #find available place to replace
                        if i == len(self.under) - 1:
                            self.appendnode()
                        i += 1
                    self.x[i] = x
                    self.y[i] = y
                    self.color[i] = turn
                    self.on[i] = nowlocal
                    self.under[nowlocal].append(i)
                    self.under[i] = []
                    self.used[i] = 1
                    self.win[i] = getf(board, turn) - board.komi
                    self.times[i] = 1
        i = 0
        while self.used[i]:  #find available place to replace
            if i == len(self.under) - 1:
                self.appendnode()
            i += 1
        self.x[i] = 13
        self.y[i] = 13
        self.color[i] = None
        self.on[i] = nowlocal
        self.under[nowlocal].append(i)
        self.under[i] = []
        self.used[i] = 1
        self.win[i] = getf(board, (not (turn - 1)) + 1) - board.komi
        self.times[i] += 1

    def search(self,
               getf,
               board,
               inl,
               d=0):  #inl is input local, the list index
        nowlocal = inl
        while self.under[nowlocal] != []:
            for i in self.under[nowlocal]:
                qu = []  #q+u
                if self.times[i] == 0:
                    print i
                qu.append(self.win[i] / self.times[i] + self.times[nowlocal] /
                          self.times[i] * 0.4)  #The lastest numder is c_puct
                if d:
                    print self.times[i]
            if d:
                print qu
                print len(qu)
            nowlocal = self.under[nowlocal][qu.index(max(qu))]
            if (self.x[nowlocal] != 13):  #if this node isn't pass
                board.play(self.x[nowlocal], self.y[nowlocal],
                           self.color[nowlocal])
        self.expand(getf, nowlocal, board,
                    (not (self.color[nowlocal] - 1)) + 1)  #expand
        while nowlocal != inl:
            self.win[nowlocal] = 0  #here backup start(init)
            self.times[nowlocal] = 0
            for i in self.under[nowlocal]:
                self.win[nowlocal] += self.win[i]
                self.times[nowlocal] += self.times[i]
            nowlocal = self.on[nowlocal]
Beispiel #10
0
	def __init__(self):
		self.board=board()
Beispiel #11
0
    async def start_tree_search(self, nownode, now_turn):
        #output is the V for the now turn player
        now_expanding = self.now_expanding

        while nownode in now_expanding:
            await asyncio.sleep(1e-4)

        if not self.is_expanded(nownode):
            #here is expanding
            self.now_expanding.add(nownode)
            #start expanding
            situation = [[i == 1, i == 2, now_turn == 1, now_turn == 2]
                         for j in nownode.grid for i in j]
            future = await self.push_queue(situation)  #now it's list
            await future
            action_probs, value = future.result()
            action_probs = action_probs[0]
            value = value[0][0]
            if now_turn is 2:
                value = -value
            i = 0
            b = board(grid=nownode.grid)
            moves = []
            while i <= 169:  #including 169, pass
                if b.ok(i / 13, i % 13, now_turn):
                    moves.append((i / 13, i % 13))
                i += 1
            del b
            node.expand(moves, action_probs, now_turn)
            self.expanded.add(nownode)  # nownode.state

            self.now_expanding.discard(nownode)

            #don't need to invert because value is always for black
            return value

        else:
            """node has already expanded. Enter select phase."""
            # select child nownode with maximum action score
            #last_state = nownode.state

            action, nownode = nownode.select()

            # action_t = self.select_move_by_action_score(key, noise=True)

            # add virtual loss
            # self.virtual_loss_do(key, action_t)
            nownode.N += virtual_loss
            nownode.W -= virtual_loss

            # evolve game board status
            # child_position = self.env_action(position, action_t)

            if (
                    nownode.passes and nownode.parent.passes
            ) or depth is 338:  # 338=13*13*2, according to AlphaGoZero Paper Page 22
                self.board.grid = nownode.grid
                value = b.final() / 169.0
            else:
                value = await self.start_tree_search(nownode, now_turn)  #遞迴
            nownode.N -= virtual_loss
            nownode.W += virtual_loss

            # on returning search path
            # update: N, W, Q, U
            # self.back_up_value(key, action_t, value)
            if now_turn is 1:
                #black
                nownode.backup(value)
            else:
                nownode.backup(-value)
            #到目前為止nownode都是被選中的child
            #因為是遞迴,所以除了未展開的節點都會backup
            #函數看到的value一直都是對黑棋來說的子差/169.0
            #但在儲存(backup)時就得把白棋的value變號
            return value