Esempio n. 1
0
def gtp_io():
    global board
    PASS_FLAG = 0
    known_commands = [
        'boardsize', 'clear_board', 'komi', 'play', 'genmove', 'quit', 'name',
        'version', 'known_command', 'list_commands', 'protocal_version'
    ]
    while True:
        try:
            line = raw_input().strip()
        except EOFError:
            break

        if line == '':
            continue
        command = [s.lower() for s in line.split()]
        if re.match('\d+', command[0]):
            cmdid = command[0]
            command = command[1:]
        else:
            cmdid = ''
        ret = ''

        if command[0] == 'boardsize':
            debug_print(
                "Warning: Trying to set incompatible boardsize %s (!= %d)" %
                (command[1], 19))
            ret = None
        elif command[0] == 'clear_board':
            board = board.Go()
        elif command[0] == 'komi':
            pass
        elif command[0] == 'play':
            if command[2].upper() == 'PASS':
                go.place_stone_num(-1)
                PASS_FLAG = 1

            if command[1].upper() == 'B':  # and board.current_player == BLACK:
                # print command[2]
                go.place_stone_num(util.gtppos_to_num(command[2].upper()))
            elif command[1].upper(
            ) == 'W':  # and board.current_player == WHITE:
                # print command[2]
                go.place_stone_num(util.gtppos_to_num(command[2].upper()))
        elif command[0] == 'genmove':

            move = -1

            if PASS_FLAG == 1:
                PASS_FLAG = 0
                move = -1
                ret = 'pass'
            else:
                move = make_prediction()
                if move is None:
                    ret = 'pass'
                if move == -1:
                    ret = 'resign'
                else:
                    ret = util.pos_to_gtppos(util.num_to_pos(move))
            go.place_stone_num(move)

        elif command[0] == 'name':
            ret = 'PikachuP, 2018'
        elif command[0] == 'version':
            ret = '0.2'
        elif command[0] == 'list_commands':
            ret = '\n'.join(known_commands)
        elif command[0] == 'protocol_version':
            ret = '2'
        elif command[0] == 'quit':
            print '=%s \n\n' % (cmdid, ),
            exit(0)
        else:
            debug_print("Unknown Command! ")
            ret = None

        if ret is not None:
            print '=%s %s\n\n' % (cmdid, ret),
        else:
            print '?%s ???\n\n' % (cmdid, ),
        sys.stdout.flush()
Esempio n. 2
0
    def load_file(self):
        while True:
            filename_train = ''
            filename_label = ''
            # 获取需要载入的文件名
            if self.is_train:
                index = self.train_list[self.train_index]
                my_print('[pl %s' % str(index).rjust(4))
                filename_train = config.train_prefix + str(index) + '.npy'
                filename_label = config.label_prefix + str(index) + '.npy'
            else:
                filename_train = config.train_prefix + 'val.npy'
                filename_label = config.label_prefix + 'val.npy'
                my_print('[pl validate data')

            # 生成一个新的棋局

            go = board.Go()

            for i in range(300):
                feature = go.generate()
                iter = mx.io.NDArrayIter(data=feature)
                res = self.module.predict(iter).asnumpy()
                print res

            training_data = np.load(filename_train)
            training_data = training_data.reshape(-1, 1, 361)
            exp = exp2.T
            exp = exp.reshape(1, input_filters, 1)
            training_data = ((np.bitwise_and(training_data, exp) > 0) +
                             0).reshape(-1, input_filters, 19, 19)
            label = np.load(filename_label)
            NUM = training_data.shape[0]
            label_data_ = np.zeros((NUM, 361))
            for i in range(NUM):
                label_data_[i][label[i][0]] = 1
            label_data_ = label_data_.reshape(-1, 19, 19)
            if self.is_train and config.apply_symmetry:
                symmetry.apply_random_symmetry(training_data, label_data_)

            label_data = label_data_.reshape(-1, 361)
            label_data = np.argmax(label_data, axis=1)
            label = None
            label_data_ = None

            # 表示加载完成
            my_print(']')

            if self.is_train:
                self.queue.put(obj=[training_data, label_data],
                               block=True,
                               timeout=None)
                self.train_index = self.train_index + 1
                # 如果已经完成全部文件的训练,那么就重新打散文件的顺序。
                if self.train_index >= len(self.train_list):
                    self.train_index = 0
                    random.shuffle(self.train_list)
            else:  # 如果是测试数据,那么一次载入
                self.data_list = [mx.ndarray.array(training_data, config.data_device), \
                                      mx.ndarray.array(label_data, config.data_device)]
            gc.collect()  # 要求垃圾回收

            if not self.is_train:
                return
                # 停下来等待信号
            if self.is_train:
                self.can_load_file.wait()
                self.can_load_file.clear()
Esempio n. 3
0
import zobrist
import time
import math
import random
import go_plot
import config

ROLLOUTS = config.search_times_ucb
DEPTH = config.search_depth_ucb
UCB_C = config.para_c_ucb
POLICY_WEIGHT = config.policy_weight
"""
当前全局盘面go,在函数中访问,请用global进行声明。
"""

go = board.Go()
"""
如果需要进行调试,取消下面两行的注释。
"""
# np.set_printoptions(threshold='nan')
# np.set_printoptions(precision=2, suppress=True)
"""
debug_print用来在console中进行人机交互
"""


def debug_print(args):
    print >> sys.stderr, args
    sys.stderr.flush()
    return
Esempio n. 4
0
def self_play():
    global module
    label_black = []
    label_white = []

    t0 = time.clock()

    black = None
    white = None

    TOTAL = 350
    NUM = 300
    go = board.Go()
    for i in range(TOTAL):
        # 生成特征
        if i % 10 == 0:
            print i,
        feature = go.generate()
        # 把特征喂入神经网络,获得预测
        iter = mx.io.NDArrayIter(data=feature)
        pred = module.predict(iter).asnumpy()
        # 将不可入点扔掉
        pred = feature[0][2].reshape(1, 361) * pred * 10
        # 排个序
        out = np.argsort(-pred)
        predsort = -np.sort(-pred)
        # print predsort[0: K]
        # 看看前K个的值
        process = out[0][0:K]
        idx = random_pick(predsort[0][0:K].reshape(-1))

        if i == 0:
            black = feature.reshape(-1, 16, 19, 19)
            label_black.append(out[0][idx])
            go.place_stone_num(out[0][idx])
        elif i == 1:
            white = feature.reshape(-1, 16, 19, 19)
            label_white.append(out[0][idx])
            go.place_stone_num(out[0][idx])
        elif i < NUM:
            f = feature.reshape(-1, 16, 19, 19)
            if i % 2 == 0:
                black = np.vstack((black, f))
                label_black.append(out[0][idx])
            else:
                white = np.vstack((white, f))
                label_white.append(out[0][idx])
            go.place_stone_num(out[0][idx])
        else:
            go.place_stone_num(out[0][0])

    go = None
    go = board.Go()

    # print len(label_black)
    for i in range(TOTAL):
        # 生成特征
        if i % 10 == 0:
            print i,
        feature = go.generate()
        # 把特征喂入神经网络,获得预测
        iter = mx.io.NDArrayIter(data=feature)
        pred = module.predict(iter).asnumpy()
        # 将不可入点扔掉
        pred = feature[0][2].reshape(1, 361) * pred * 10
        # 排个序
        out = np.argsort(-pred)
        predsort = -np.sort(-pred)
        # print predsort[0: K]
        # 看看前K个的值
        process = out[0][0:K]
        idx = random_pick(predsort[0][0:K].reshape(-1))

        if i < NUM:
            f = feature.reshape(-1, 16, 19, 19)
            if i % 2 == 0:
                black = np.vstack((black, f))
                label_black.append(out[0][idx])
            else:
                white = np.vstack((white, f))
                label_white.append(out[0][idx])
            go.place_stone_num(out[0][idx])
        else:
            go.place_stone_num(out[0][0])
    print
    cnt = 0
    for x in label_black:

        print x,
        cnt += 1
        if cnt == 150:
            print

    print label_white

    BLACK_LABEL = np.zeros((300, 361), dtype=np.int16)
    WHITE_LABEL = np.zeros((300, 361), dtype=np.int16)
    for i in range(600):
        if i % 2 == 0:  # 偶数,黑棋盘面
            # print "BLACK", i
            BLACK_LABEL[i // 2][label_black[i // 2]] = 1
        else:
            # print "WHITE", i
            WHITE_LABEL[(i - 1) // 2][label_white[(i - 1) // 2]] = 1

    permutation = np.random.permutation(black.shape[0])
    black = black[permutation, :]
    white = white[permutation, :]

    BLACK_LABEL = BLACK_LABEL.reshape(-1, 19, 19)
    WHITE_LABEL = WHITE_LABEL.reshape(-1, 19, 19)
    BLACK_LABEL = BLACK_LABEL.reshape(-1, 361)
    WHITE_LABEL = WHITE_LABEL.reshape(-1, 361)
    BLACK_LABEL = BLACK_LABEL[permutation, :]
    WHITE_LABEL = WHITE_LABEL[permutation, :]
    BLACK_LABEL = np.argmax(BLACK_LABEL, axis=1)
    WHITE_LABEL = np.argmax(WHITE_LABEL, axis=1)

    print BLACK_LABEL
    print WHITE_LABEL
    """
    # 判断胜负, 黑胜返回1,白胜利返回0
    def evaluate(self):

    """
    # go_plot.go_plot(terminal // 50)
    result = go.evaluate()

    if result == 1:
        print("B+")
    else:
        print("W+")

    print "generate_used", time.clock() - t0

    BLACK = black
    WHITE = white

    # print self.BLACK
    # print self.WHITE
    """
    0代表黑胜利,1代表白胜利
    """
    if result == 1:
        winner = 1
    else:
        winner = 0
    go = None
    label_black = None
    label_white = None

    return result