def __init__(self, side, network, debugging=True, n_playout=800, search_threads=16, virtual_loss=0.02, policy_loop_arg=True, c_puct=5, dnoise=False, temp_round=conf.train_temp_round, can_surrender=False, surrender_threshold=-0.99, allow_legacy=False, repeat_noise=True): super(NetworkPlayer, self).__init__(side) self.network = network self.debugging = debugging self.queue = Queue(400) self.temp_round = temp_round self.can_surrender = can_surrender self.allow_legacy = allow_legacy self.surrender_threshold = surrender_threshold self.repeat_noise = repeat_noise self.mcts_policy = mcts_async.MCTS(self.policy_value_fn_queue, n_playout=n_playout, search_threads=search_threads, virtual_loss=virtual_loss, policy_loop_arg=policy_loop_arg, c_puct=c_puct, dnoise=dnoise)
if move in legal_move_b: move = board.flipped_uci_labels([move])[0] action_probs.append((move,prob)) else: for move,prob in zip(uci_labels,policyout): if move in legal_move: action_probs.append((move,prob)) action_probs = sorted(action_probs,key=lambda x:x[1]) return action_probs, valout for one_play in range(10): states = [] moves = [] game_states = GameState() mcts_policy_w = mcts_async.MCTS(policy_value_fn_async_batch,n_playout=1600,search_threads=32,virtual_loss=0.03) mcts_policy_b = mcts_async.MCTS(policy_value_fn_async_batch,n_playout=1600,search_threads=32,virtual_loss=0.03) result = 'peace' for i in range(150): begin = time.time() is_end,winner = game_states.game_end() if is_end == True: result = winner break start = time.time() if i % 2 == 0: player = 'w' if i < 18: temp = 1 else:
with graph.as_default(): saver = tf.train.Saver(var_list=tf.global_variables()) saver.restore(sess, model_dir) print('param updated {}'.format(model_dir)) nm.updated(latest_model_name) print("current weight {}".format(nm.netname)) states = [] moves = [] game_states = GameState() mcts_policy_w = mcts_async.MCTS(policy_value_fn_queue, n_playout=800, search_threads=16, virtual_loss=0.02, policy_loop_arg=True, c_puct=5, dnoise=False) mcts_policy_b = mcts_async.MCTS(policy_value_fn_queue, n_playout=800, search_threads=16, virtual_loss=0.02, policy_loop_arg=True, c_puct=5, dnoise=False) result = 'peace' peace_round = 0 remain_piece = countpiece(game_states.statestr) can_surrender = random.random() > 0.1