def generate(config, model_file, output_file): model = load_model(model_file) with open(output_file, 'ab') as fout: file_pos = fout.tell() # Truncate any partially written record fout.seek(file_pos - file_pos % record_size(config.size)) samples = 0 start_time = time.time() game_boards = numpy.array([new_board(config.size) for i in range(config.batch_size)]) game_moves = [[] for i in range(config.batch_size)] while True: _values, priors = model.predict(game_boards) priors = numpy.reshape(priors, (-1, config.size, config.size)) for i in range(config.batch_size): probs = fix_probabilities(game_boards[i], priors[i]) move = sample_move(probs) game_moves[i].append(move) game_boards[i] = make_move(game_boards[i], move) if winner(game_boards[i]): samples += 1 board, won, visits = game_result(config, model, game_moves[i]) write_record(fout, board, won, visits) fout.flush() print_board(board, file=sys.stderr) print('Games: %d, Time per game: %.2fs' % (samples, (time.time() - start_time) / samples), file=sys.stderr) game_boards[i] = new_board(config.size) game_moves[i] = []
def game_result(config, model, moves): last_move_index = len(moves) - 1 end = random.randint(0, last_move_index) board = new_board(config.size) for move in moves[:end]: board = make_move(board, move) predictor = TreeSearchPredictor(config.search_config, model, board, end == 0) predictor.run(config.iterations) return board, last_move_index % 2 == end % 2, predictor.visits()
def compare(config, model1, model2, t, T, num_games): games = 0 first_player_wins = 0 win_ratio, uncertainty = None, None ratios = [] for i in range(num_games): move_index = 0 predictors = [ TreeSearchPredictor(config.search_config, model1, new_board(config.size), True, t, T), TreeSearchPredictor(config.search_config, model2, new_board(config.size), True) ] while not winner(predictors[0].board): if move_index == 0: predictor = predictors[1] else: predictor = predictors[(games ^ move_index) & 1] predictor.run(config.iterations) value, probabilities = predictor.predict() if games & 1 == move_index & 1: probabilities = temperature(probabilities, T) move = refined_move(probabilities) for predictor in predictors: predictor.make_move(move) move_index += 1 games += 1 if games & 1 == move_index & 1: first_player_wins += 1 win_ratio = float(first_player_wins) / games uncertainty = win_ratio * math.sqrt(win_ratio * (1 - win_ratio) / games) ratios.append(win_ratio) return ratios
def read_record(f, size): record = f.read(record_size(size)) board = new_board(size) visits = numpy.zeros((size, size)) offset = 0 for x in range(size): for y in range(size): cell = short.unpack_from(record, offset)[0] offset += short.size board[0,x,y] = (cell >> 6) & 1 board[1,x,y] = (cell >> 7) & 1 visits[x,y] = cell & ((1 << 6) - 1) won = boolean.unpack_from(record, offset)[0] return board, won, visits
def clear_board(self): self.board = new_board(self.config.size) self.history = [] return ''
def compare(config, num_games, temp, Temp, name): print('\nplaying with', name, 'with vals:', temp, Temp) alpha_wins = 0 wolve_wins = 0 # while True: for i in range(num_games): # alpha_agent = TreeSearchPredictor(config.search_config, model, new_board(config.size), True, t, T) alpha_agent = TreeSearchPredictor(config.search_config, model, new_board(config.size), True, temp, Temp) # make sure wolve have new clear board wolve.clear_board() # start game print('its game number: ', i + 1) while not winner(alpha_agent.board): # alpha turn alpha_agent.run(config.iterations) value, probabilities = alpha_agent.predict() #print(probabilities) #probabilities = fix_probabilities(alpha_agent.board, probabilities) probabilities = fix_probabilities(alpha_agent.board, probabilities) #print(probabilities) alpha_move = best_move(probabilities) #print('alphaaaaa: ', alpha_move) alpha_agent.make_move(alpha_move) # insert move to wolve letter, number = alpha_move alpha_move = str(num_to_letter[letter]) + str(number + 1) #print(f'alpha(B): {alpha_move}') wolve.insert_move("black", alpha_move) if winner(alpha_agent.board): print("alpha wins!!!") alpha_wins += 1 continue # wolve turn wolve_move = wolve.genmove("white") #print(f'wolve(W): {wolve_move}') letter = letter_to_num[wolve_move[0]] number = int(wolve_move[1:]) - 1 #wolve_move = (letter, number) wolve_move = (number, letter) # insert wolve move to alpha alpha_agent.make_move(wolve_move) #print('wove board:') #print(wolve.showboard()) # print('alpha board:') # print_board(flip(alpha_agent.board), wolve_move, file=sys.stderr) if winner(alpha_agent.board): print("wolve wins!!!") wolve_wins += 1 continue print(name, 'won', alpha_wins, 'out of', i + 1) print('wolve won', wolve_wins, 'out of', i + 1) print(name, 'won', alpha_wins, 'times out of', num_games, 'games') print('wolve won', wolve_wins, 'times out of', num_games, 'games') return alpha_wins / num_games
from agent1_zero_dnn.tree_search import TreeSearchPredictor,temperature from convertor.Convertor_ver4 import convert from convertor.Convertor_ver4 import convert_last_moves from agent1_zero_dnn.game import refined_moves,best_k_moves,new_board from agent1_zero_dnn.generate import fix_probabilities import sys import numpy as np import os from agent1_zero_dnn.config import CompareConfig import math model_name="model" config = CompareConfig() model=load_model(model_name) predictor = TreeSearchPredictor(config.search_config, model, new_board(config.size), True) def adam(params,grads,lr,vs,sqrs,i): beta1 = 0.9 beta2 = 0.999 eps_stable = 1e-8 ret_params = [] for param,grad,v,sqr in zip(params,grads,vs,sqrs): g = grad v = beta1 * v + (1. - beta1) * g sqr = beta2 * sqr + (1. - beta2) * np.square(g) v_bias_corr = v / (1. - beta1 ** i)