Ejemplo n.º 1
0
 def thread_test(self):
     print("开始自我测试……")
     if not self.net:
         self.net = policy.PolicyNetwork(is_train=True, selftrain=True)
     old_net = policy.PolicyNetwork(is_train=False, use_old=True)
     config.maxelo=old_net.elo + 30 + 20000 / (6000+self.net.elo)
     st=time.time()
     win=0
     i=0
     self.running=True
     while config.running and self.running:
         result = test_play(self.net, old_net, qp=self.qp, bw = go.BLACK if i%2==0 else go.WHITE)
         win += 1 if result>0 else 0
         i += 1
         wr = win*100.0/i
         print("第{}盘{},胜率:{:.2f},{:.1f}盘/分钟。".format(i,
             go.result_str(result).replace("黑","新权重").replace("白","老权重"), wr, 
             60/(time.time()-st)), file=sys.stderr)
         st=time.time()
         time.sleep(0.1)
         up_elo = 30 + 20000 / (6000+self.net.elo)
         if self.net.elo>config.maxelo+up_elo:
             self.net.save_variables()
             old_net.restore()
             self.net.save(old_net.save_file)
             config.maxelo=self.net.elo
             i = 0
             win = 0
         if i>400:
             i=0
             win=0
     self.running = False
     print("退出自我测试。")
Ejemplo n.º 2
0
 def run(self):
     print ("开启线程: " + self.name, file=sys.stderr)
     #sp = SelfPlay(qp=self.qp)
     config.running = True
     if '测试' in self.name:
         net = policy.PolicyNetwork(is_train=True, use_old=False)
         old_net = policy.PolicyNetwork(is_train=False, use_old=True)
         for i in range(config.test_num):
             test_play(net, old_net, qp=self.qp)
         net.save_variables()
         oldfile = old_net.save_file
         del old_net
         net.save(oldfile)
     else:
         #b=board.Board()
         #search = allsearch.AllSearch(b)
         #search.setDaemon(True)
         #search.start()
         self.self_train = SelfTrain(qp=self.qp, sgfdir=self.sgfdir, net=self.train_net)
         self.self_train.train_db(False)
     config.running=False
     print ("退出线程: " + self.name, file=sys.stderr)
Ejemplo n.º 3
0
 def train_loop(self):
     test=False
     while config.running:
         #print(time.strftime('%m-%d %H:%M:%S'), ":开始自战……")
         if not self.net:
             self.net = policy.PolicyNetwork(is_train=True, selftrain=True)
             self.net.game_num += 2560
         #processplay.process_play(1, self.net, self.qp)
         self.thread_play()
         print(time.strftime('%m-%d %H:%M:%S'), ":开始训练……")
         self.train(test)
         if not test:
             test=True
     print("自我训练完成。")
Ejemplo n.º 4
0
 def train_db(self, test=False):
     train_start = time.time()
     config.running = True
     self.running = True
     if not self.net:
         self.net = policy.PolicyNetwork(is_train=True, selftrain=True)
     if test:
         thtest = threading.Thread(target = self.thread_test)
         thtest.setDaemon(True)
         thtest.start()
     self.datas.loaddb()
     self.net.train(self.datas)
     self.datas.clear()
     self.datas.data_files = []
     self.running = False
Ejemplo n.º 5
0
 def thread_play(self):
     if not self.net:
         self.net = policy.PolicyNetwork(is_train=True, selftrain=True)
     game_num = self.net.game_num
     while config.running:
         train_start = time.time()
         forbid_pass=True if self.net.elo<-1000 else False
         if not forbid_pass and random.randint(100)>95:
             forbid_pass = True
         pos = self.play(forbid_pass=forbid_pass)
         #steps = sample_moves(board.step)
         #self.datas.add_from_pos_steps(board, steps, board.result)
         result = pos.result
         print('%s(%.1f):第%d盘自我博弈完成,%s胜%.1f子。' % (time.strftime(
             '%m-%d %H:%M:%S'), time.time()-train_start, self.net.game_num-game_num+1, 
             go.get_color_str(result), abs(result)))
         self.net.game_num += 1
         if self.net.game_num-game_num>1000:
             break
Ejemplo n.º 6
0
 def selftrain(self, test=True):
     train_start = time.time()
     #print('%s(%.1f):正式开始训练……' % (time.strftime('%m-%d %H:%M:%S'), train_start-train_start))
     config.running = True
     self.running = True
     thsearch = threading.Thread(target=self.thread_allsearch)
     thsearch.setDaemon(True)
     thsearch.start()
     n=0
     #while config.running and self.running and n<100:
     #    time.sleep(5)
     if not self.net:
         self.net = policy.PolicyNetwork(is_train=True, selftrain=True)
     n = self.thread_data()
     if test:
         thtest = threading.Thread(target = self.thread_test)
         thtest.setDaemon(True)
         thtest.start()
     self.net.train(self.datas)
     self.datas.clear()
     self.datas.data_files = []
     self.net.train_n = n
     self.running = False
Ejemplo n.º 7
0
    def thread_data(self):
        maxn = 0
        fns = os.listdir(self.datas.save_dir)
        if not fns:
            return maxn
        fn = max(fns, key=lambda x:int(x[5:]))
        maxn = int(fn[5:])
##        for fn in os.listdir(self.datas.save_dir):
##            filepath = os.path.join(self.datas.save_dir,fn)
##            if os.path.isfile(filepath):
##                m = re.match(r'train([0-9]+)', filepath)
##                if m:
##                    n = int(m.groups(1))
##                    if n>maxn:
##                        maxn=n
        if not self.net:
            self.net = policy.PolicyNetwork(is_train=True, selftrain=True)
        s =  self.net.train_n
        n = int(250000*config.batch_size/10000/16)
        if s + n>maxn:
            s=max(0,min(s, maxn-n))
            e=maxn
        else:
            e = min(maxn, s + n)
        print("最大文件号:", maxn, "训练开始文件号:", s, "结束:", e)
        self.datas.data_files = []
        for i in range(s, e):
            filepath = os.path.join(self.datas.save_dir, "train{}".format(i))
            if os.path.isfile(filepath):
                self.datas.data_files.append(filepath)
                self.net.game_num += 80
        fnum = len(self.datas.data_files)
        if fnum<=0:
            config.running=False
        print("训练文件夹:", self.datas.save_dir, "文件数:", fnum)
        self.datas.start_load(del_file=False)
        return e+1
Ejemplo n.º 8
0
import time
import go
import strategies
import policy
import sgf_wrapper
import load_data_sets
import utils

net = policy.PolicyNetwork()
net.initialize_variables('/Users/brilee/dev/MuGo/saved_models/20170718')
now = time.time()

positions = [
    go.Position(to_play=go.BLACK if i % 2 == 0 else go.WHITE) for i in range(2)
]
# neural net 1 always plays "black", and variety is accomplished by
# letting white play first half the time.
strategies.simulate_many_games(net, net, positions)
print(time.time() - now)
now = time.time()


def get_winrate(final_positions):
    black_win = [
        utils.parse_game_result(pos.result()) == go.BLACK
        for pos in final_positions
    ]
    return sum(black_win) / len(black_win)


def extract_moves(final_positions):
Ejemplo n.º 9
0
import policy
import time
import torch
import memory
import random
import math
import torch.nn.functional as F
import nstep
import numpy as np
from graphics import *
import utils

STATE_SPACE_SIZE = 8
ACTION_SPACE_SIZE = 4

policy_network = policy.PolicyNetwork(STATE_SPACE_SIZE, ACTION_SPACE_SIZE)

goal_network = policy.GoalNetwork(STATE_SPACE_SIZE)

value_network = policy.ValueNetwork(STATE_SPACE_SIZE)
lagged_value_network = policy.ValueNetwork(STATE_SPACE_SIZE)
lagged_value_network.copy_weights(value_network)

value_network2 = policy.ValueNetwork(STATE_SPACE_SIZE)
lagged_value_network2 = policy.ValueNetwork(STATE_SPACE_SIZE)
lagged_value_network2.copy_weights(value_network2)

replay_memory_size = 100000
replay_memory = memory.ReplayMemory(replay_memory_size)

# export OMP_NUM_THREADS=1