def move(self, color, vertex=None): legal = True if vertex is None: vertex = self.player.move() legal = vertex is not None else: legal = go.move(vertex) if legal: take = go.get_take(go.POSITION) return go.toJI(go.POSITION.vertex), {go.toJI(v) for v in take} else: return None, {}
def test(self, estimset): rights = [0] * 10 for pos in estimset: v = go.LN if pos.vertex != 0: p, q = go.toJI(pos.vertex) v = q * go.N + p - go.N - 1 self.input_board(pos.parent) y = None if torch.cuda.is_available(): x = Variable(INPUT_BOARD).cuda() y = self.resnet(x).data.cpu() else: x = Variable(INPUT_BOARD) y = self.resnet(x).data predicted = np.argsort(y[0].numpy()) prediction = predicted[::-1] i = 0 while i < 10: if v == prediction[i]: rights[i] += 1 break i += 1 return rights
def train(self, trainset, estimset, epoch): self.criterion = nn.CrossEntropyLoss() self.optimizer = optim.SGD(self.resnet.parameters(), lr=0.001, momentum=0.9) if torch.cuda.is_available(): self.criterion = self.criterion.cuda() for e in range(epoch): batch = 10 n = math.floor(len(trainset) / batch) i = 0 running_loss = 0.0 random.shuffle(trainset) while i < n: j = 0 target_data = torch.LongTensor(batch) input_data = torch.zeros(batch, 2, go.N, go.N) while j < batch: k = i * batch + j pos = trainset[k] v = go.LN if pos.vertex != 0: p, q = go.toJI(pos.vertex) v = q * go.N + p - go.N - 1 target_data[j] = v self.input_board(pos.parent) input_data[j].copy_(INPUT_BOARD[0]) j += 1 self.optimizer.zero_grad() x = Variable(input_data) t = Variable(target_data) if torch.cuda.is_available(): x = x.cuda() t = t.cuda() y = self.resnet(x) loss = self.criterion(y, t) loss.backward() self.optimizer.step() i += 1 running_loss += loss.item() rights = self.test(estimset) result = '' right = 0 for r in range(10): right += rights[r] result += '%d:%.1f, ' % (r, right / len(estimset) * 100) print('epoch: %d, loss %.3f, Est: %s' % (e, running_loss / n, result)) torch.save(self.resnet.state_dict(), '../data/goai_%d.pth' % e)
def move(self): global POLICY POLICY = self.policy root_node = None if self.best_node is not None: if self.best_node.position.next == go.POSITION.next: if self.best_node.position.vertex == go.POSITION.vertex: root_node = self.best_node self.best_node = None else: for node in self.best_node.children: if node.position.vertex == go.POSITION.vertex: root_node = node else: node.release() self.best_node.release(False) self.best_node = None if root_node is None: root_node = MCTSNode(None, go.POSITION) elif root_node.position is not go.POSITION: root_node.position.release() root_node.position = go.POSITION for node in root_node.children: node.position.parent = go.POSITION for pos in root_node.positions: pos.parent = go.POSITION start = time.time() sim_count = 0 while time.time() - start < self.seconds_per_move: #selection current_node = root_node.select() #if every child node is exploid, find the best directly if current_node is None: print("Leaves is empty!") break #expand current_node = current_node.expand() #simulate R = current_node.simulate() #backpropagate while current_node is not None: current_node.backpropagation(R) current_node = current_node.parent sim_count += 1 if sim_count >= 10000: break if len(root_node.children) > 0: poolsize = len(go.POSITION_POOL) self.best_node = max(root_node.children, key=lambda node: node.N) vertex = self.best_node.position.vertex go.POSITION.reset_liberty() go.POSITION = go.POSITION.move(vertex) go.POSITION.update_group() self.debug_info = '%02d ' % go.get_step() for node in root_node.children: # self.debug_info += '%d,' % node.N if node != self.best_node: node.release() root_node.release(False) i, j = go.toJI(vertex) self.debug_info += 'V:[%d,%d] POOL:%d Q:%.1f SIM:%d' % ( i, j, poolsize, self.best_node.Q, sim_count) print(self.debug_info) return True else: root_node.release() return False
def train(trainset, evalset, epoch=1): board = tf.placeholder(tf.float32, [None, go.N, go.N, 1]) labels = tf.placeholder(tf.int64, [None]) num_planes = 32 kernel = tf.random_uniform([5, 5, 1, num_planes], minval=-0.2, maxval=0.2) conv0 = tf.nn.conv2d(board, kernel, [1, 1, 1, 1], 'SAME') conv1 = Residual_block(conv0, num_planes, num_planes) conv2 = Residual_block(conv1, num_planes, num_planes) conv3 = Residual_block(conv2, num_planes, num_planes) conv4 = Residual_block(conv3, num_planes, num_planes) conv5 = Residual_block(conv4, num_planes, num_planes) kernel2 = tf.random_uniform([1, 1, num_planes, 4], minval=-1. / num_planes, maxval=1. / num_planes) conv6 = tf.nn.conv2d(conv5, kernel2, [1, 1, 1, 1], 'VALID') linear = tf.reshape(conv6, [-1, go.LN * 4]) weight = tf.Variable( tf.random_uniform([go.LN * 4, go.LN + 1], minval=-0.25 / go.LN, maxval=0.25 / go.LN)) predict = tf.matmul(linear, weight) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=predict) cross_entropy_mean = tf.reduce_mean(cross_entropy) opt = tf.train.GradientDescentOptimizer(0.001) train_step = opt.minimize(cross_entropy_mean) # iii = 0 with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver(max_to_keep=2) for e in range(epoch): batch = 10 n = math.floor(len(trainset) / batch) i = 0 loss = 0 random.shuffle(trainset) while i < n: j = 0 y_data = np.zeros(batch, dtype=np.float32) x_data = np.zeros(batch * go.LN, dtype=np.float32).reshape( batch, go.N, go.N, 1) while j < batch: k = i * batch + j pos = trainset[k] v = go.LN if pos.vertex != 0: p, q = go.toJI(pos.vertex) v = q * go.N + p - go.N - 1 y_data[j] = v input_board(pos.parent, x_data[j]) j += 1 # iii += 1 # if iii == 20: # print(sess.run(predict, feed_dict={board: x_data}).shape) sess.run(train_step, feed_dict={labels: y_data, board: x_data}) loss += sess.run(cross_entropy_mean, feed_dict={ labels: y_data, board: x_data }) i += 1 loss /= n right = 0 for pos in evalset: if pos.vertex != 0: x_data = np.zeros(go.LN, dtype=np.float32).reshape( 1, go.N, go.N, 1) p, q = go.toJI(pos.vertex) v = q * go.N + p - go.N - 1 input_board(pos.parent, x_data[0]) prediction = sess.run(predict, feed_dict={board: x_data}) sortedmoves = np.argsort(prediction[0])[::-1] if v == sortedmoves[0]: right += 1 # ratio = right/len(evalset)*100.0 print("epoch: %d, loss: %f, right: %d / %d" % (e, loss, right, len(evalset))) saver.save(sess, './module/goai_tf', global_step=e + 1)
evalset = [] testfiles = [f for f in listdir('../data/estimate/') if f[-4:] == 'json'] for f in testfiles: with open('../data/estimate/' + f) as json_data: record = json.load(json_data) s = 0 parent = go.Position() while s < len(record) and s <= go.LN: position = go.Position() position.fromJSON(record[s]) position.parent = parent parent = position if position.vertex != 0: evalset.append(position) s += 1 right = 0 for pos in evalset: if pos.vertex != 0: x_data = np.zeros(go.LN, dtype=np.float32).reshape(1, 1, go.N, go.N) p, q = go.toJI(pos.vertex) v = q * go.N + p - go.N - 1 input_board(pos.parent, x_data[0][0]) prediction = sess.run(predict, feed_dict={'import/0:0': x_data}) sortedmoves = np.argsort(prediction[0])[::-1] if v == sortedmoves[0]: right += 1 print("EST: %d | %d" % (right, len(evalset)))