Exemple #1
0
    def move(self, color, vertex=None):
        legal = True

        if vertex is None:
            vertex = self.player.move()
            legal = vertex is not None
        else:
            legal = go.move(vertex)

        if legal:
            take = go.get_take(go.POSITION)
            return go.toJI(go.POSITION.vertex), {go.toJI(v) for v in take}
        else:
            return None, {}
Exemple #2
0
    def test(self, estimset):
        rights = [0] * 10
        for pos in estimset:
            v = go.LN
            if pos.vertex != 0:
                p, q = go.toJI(pos.vertex)
                v = q * go.N + p - go.N - 1

            self.input_board(pos.parent)

            y = None
            if torch.cuda.is_available():
                x = Variable(INPUT_BOARD).cuda()
                y = self.resnet(x).data.cpu()
            else:
                x = Variable(INPUT_BOARD)
                y = self.resnet(x).data

            predicted = np.argsort(y[0].numpy())
            prediction = predicted[::-1]

            i = 0
            while i < 10:
                if v == prediction[i]:
                    rights[i] += 1
                    break
                i += 1

        return rights
Exemple #3
0
    def train(self, trainset, estimset, epoch):
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.SGD(self.resnet.parameters(),
                                   lr=0.001,
                                   momentum=0.9)
        if torch.cuda.is_available():
            self.criterion = self.criterion.cuda()

        for e in range(epoch):
            batch = 10
            n = math.floor(len(trainset) / batch)
            i = 0
            running_loss = 0.0
            random.shuffle(trainset)
            while i < n:
                j = 0
                target_data = torch.LongTensor(batch)
                input_data = torch.zeros(batch, 2, go.N, go.N)
                while j < batch:
                    k = i * batch + j
                    pos = trainset[k]
                    v = go.LN
                    if pos.vertex != 0:
                        p, q = go.toJI(pos.vertex)
                        v = q * go.N + p - go.N - 1

                    target_data[j] = v

                    self.input_board(pos.parent)
                    input_data[j].copy_(INPUT_BOARD[0])

                    j += 1

                self.optimizer.zero_grad()

                x = Variable(input_data)
                t = Variable(target_data)
                if torch.cuda.is_available():
                    x = x.cuda()
                    t = t.cuda()

                y = self.resnet(x)

                loss = self.criterion(y, t)
                loss.backward()
                self.optimizer.step()

                i += 1
                running_loss += loss.item()

            rights = self.test(estimset)
            result = ''
            right = 0
            for r in range(10):
                right += rights[r]
                result += '%d:%.1f, ' % (r, right / len(estimset) * 100)

            print('epoch: %d, loss %.3f, Est: %s' %
                  (e, running_loss / n, result))
            torch.save(self.resnet.state_dict(), '../data/goai_%d.pth' % e)
Exemple #4
0
    def move(self):
        global POLICY
        POLICY = self.policy
        root_node = None

        if self.best_node is not None:
            if self.best_node.position.next == go.POSITION.next:
                if self.best_node.position.vertex == go.POSITION.vertex:
                    root_node = self.best_node
                    self.best_node = None
            else:
                for node in self.best_node.children:
                    if node.position.vertex == go.POSITION.vertex:
                        root_node = node
                    else:
                        node.release()
                self.best_node.release(False)
                self.best_node = None

        if root_node is None:
            root_node = MCTSNode(None, go.POSITION)
        elif root_node.position is not go.POSITION:
            root_node.position.release()
            root_node.position = go.POSITION
            for node in root_node.children:
                node.position.parent = go.POSITION
            for pos in root_node.positions:
                pos.parent = go.POSITION

        start = time.time()
        sim_count = 0
        while time.time() - start < self.seconds_per_move:
            #selection
            current_node = root_node.select()

            #if every child node is exploid, find the best directly
            if current_node is None:
                print("Leaves is empty!")
                break

            #expand
            current_node = current_node.expand()

            #simulate
            R = current_node.simulate()

            #backpropagate
            while current_node is not None:
                current_node.backpropagation(R)
                current_node = current_node.parent

            sim_count += 1
            if sim_count >= 10000:
                break

        if len(root_node.children) > 0:
            poolsize = len(go.POSITION_POOL)
            self.best_node = max(root_node.children, key=lambda node: node.N)

            vertex = self.best_node.position.vertex

            go.POSITION.reset_liberty()
            go.POSITION = go.POSITION.move(vertex)
            go.POSITION.update_group()

            self.debug_info = '%02d  ' % go.get_step()
            for node in root_node.children:
                # self.debug_info += '%d,' % node.N
                if node != self.best_node:
                    node.release()

            root_node.release(False)

            i, j = go.toJI(vertex)
            self.debug_info += 'V:[%d,%d]  POOL:%d  Q:%.1f  SIM:%d' % (
                i, j, poolsize, self.best_node.Q, sim_count)

            print(self.debug_info)

            return True
        else:
            root_node.release()
            return False
Exemple #5
0
def train(trainset, evalset, epoch=1):
    board = tf.placeholder(tf.float32, [None, go.N, go.N, 1])
    labels = tf.placeholder(tf.int64, [None])

    num_planes = 32

    kernel = tf.random_uniform([5, 5, 1, num_planes], minval=-0.2, maxval=0.2)
    conv0 = tf.nn.conv2d(board, kernel, [1, 1, 1, 1], 'SAME')
    conv1 = Residual_block(conv0, num_planes, num_planes)
    conv2 = Residual_block(conv1, num_planes, num_planes)
    conv3 = Residual_block(conv2, num_planes, num_planes)
    conv4 = Residual_block(conv3, num_planes, num_planes)
    conv5 = Residual_block(conv4, num_planes, num_planes)
    kernel2 = tf.random_uniform([1, 1, num_planes, 4],
                                minval=-1. / num_planes,
                                maxval=1. / num_planes)
    conv6 = tf.nn.conv2d(conv5, kernel2, [1, 1, 1, 1], 'VALID')
    linear = tf.reshape(conv6, [-1, go.LN * 4])
    weight = tf.Variable(
        tf.random_uniform([go.LN * 4, go.LN + 1],
                          minval=-0.25 / go.LN,
                          maxval=0.25 / go.LN))
    predict = tf.matmul(linear, weight)

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=predict)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    opt = tf.train.GradientDescentOptimizer(0.001)
    train_step = opt.minimize(cross_entropy_mean)

    # iii = 0

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        saver = tf.train.Saver(max_to_keep=2)

        for e in range(epoch):
            batch = 10
            n = math.floor(len(trainset) / batch)
            i = 0
            loss = 0

            random.shuffle(trainset)
            while i < n:
                j = 0
                y_data = np.zeros(batch, dtype=np.float32)
                x_data = np.zeros(batch * go.LN, dtype=np.float32).reshape(
                    batch, go.N, go.N, 1)
                while j < batch:
                    k = i * batch + j
                    pos = trainset[k]
                    v = go.LN
                    if pos.vertex != 0:
                        p, q = go.toJI(pos.vertex)
                        v = q * go.N + p - go.N - 1

                    y_data[j] = v
                    input_board(pos.parent, x_data[j])

                    j += 1

                # iii += 1
                # if iii == 20:
                #     print(sess.run(predict, feed_dict={board: x_data}).shape)

                sess.run(train_step, feed_dict={labels: y_data, board: x_data})
                loss += sess.run(cross_entropy_mean,
                                 feed_dict={
                                     labels: y_data,
                                     board: x_data
                                 })

                i += 1

            loss /= n

            right = 0
            for pos in evalset:
                if pos.vertex != 0:
                    x_data = np.zeros(go.LN, dtype=np.float32).reshape(
                        1, go.N, go.N, 1)
                    p, q = go.toJI(pos.vertex)
                    v = q * go.N + p - go.N - 1
                    input_board(pos.parent, x_data[0])
                    prediction = sess.run(predict, feed_dict={board: x_data})
                    sortedmoves = np.argsort(prediction[0])[::-1]
                    if v == sortedmoves[0]:
                        right += 1

            # ratio = right/len(evalset)*100.0

            print("epoch: %d, loss: %f, right: %d / %d" %
                  (e, loss, right, len(evalset)))

            saver.save(sess, './module/goai_tf', global_step=e + 1)
Exemple #6
0
    evalset = []
    testfiles = [f for f in listdir('../data/estimate/') if f[-4:] == 'json']

    for f in testfiles:
        with open('../data/estimate/' + f) as json_data:
            record = json.load(json_data)
            s = 0
            parent = go.Position()
            while s < len(record) and s <= go.LN:
                position = go.Position()
                position.fromJSON(record[s])
                position.parent = parent
                parent = position
                if position.vertex != 0:
                    evalset.append(position)
                s += 1

    right = 0
    for pos in evalset:
        if pos.vertex != 0:
            x_data = np.zeros(go.LN, dtype=np.float32).reshape(1, 1, go.N, go.N)
            p, q = go.toJI(pos.vertex)
            v = q * go.N + p - go.N - 1
            input_board(pos.parent, x_data[0][0])
            prediction = sess.run(predict, feed_dict={'import/0:0': x_data})
            sortedmoves = np.argsort(prediction[0])[::-1]
            if v == sortedmoves[0]:
                right += 1
    print("EST: %d | %d" % (right, len(evalset)))