Example #1
0
def generate(config, model_file, output_file):
    model = load_model(model_file)
    with open(output_file, 'ab') as fout:
        file_pos = fout.tell()
        # Truncate any partially written record
        fout.seek(file_pos - file_pos % record_size(config.size))
        samples = 0
        start_time = time.time()
        game_boards = numpy.array([new_board(config.size) for i in range(config.batch_size)])
        game_moves = [[] for i in range(config.batch_size)]
        while True:
            _values, priors = model.predict(game_boards)
            priors = numpy.reshape(priors, (-1, config.size, config.size))
            for i in range(config.batch_size):
                probs = fix_probabilities(game_boards[i], priors[i])
                move = sample_move(probs)
                game_moves[i].append(move)
                game_boards[i] = make_move(game_boards[i], move)
                if winner(game_boards[i]):
                    samples += 1
                    board, won, visits = game_result(config, model, game_moves[i])
                    write_record(fout, board, won, visits)
                    fout.flush()
                    print_board(board, file=sys.stderr)
                    print('Games: %d, Time per game: %.2fs' % (samples, (time.time() - start_time) / samples), file=sys.stderr)
                    game_boards[i] = new_board(config.size)
                    game_moves[i] = []
Example #2
0
def game_result(config, model, moves):
    last_move_index = len(moves) - 1
    end = random.randint(0, last_move_index)
    board = new_board(config.size)
    for move in moves[:end]:
        board = make_move(board, move)
    predictor = TreeSearchPredictor(config.search_config, model, board, end == 0)
    predictor.run(config.iterations)
    return board, last_move_index % 2 == end % 2, predictor.visits()
Example #3
0
def compare(config, model1, model2, t, T, num_games):
    games = 0
    first_player_wins = 0
    win_ratio, uncertainty = None, None

    ratios = []
    for i in range(num_games):
        move_index = 0
        predictors = [
            TreeSearchPredictor(config.search_config, model1,
                                new_board(config.size), True, t, T),
            TreeSearchPredictor(config.search_config, model2,
                                new_board(config.size), True)
        ]

        while not winner(predictors[0].board):
            if move_index == 0:
                predictor = predictors[1]
            else:
                predictor = predictors[(games ^ move_index) & 1]
            predictor.run(config.iterations)
            value, probabilities = predictor.predict()

            if games & 1 == move_index & 1:
                probabilities = temperature(probabilities, T)

            move = refined_move(probabilities)

            for predictor in predictors:
                predictor.make_move(move)

            move_index += 1
        games += 1
        if games & 1 == move_index & 1:
            first_player_wins += 1
        win_ratio = float(first_player_wins) / games
        uncertainty = win_ratio * math.sqrt(win_ratio *
                                            (1 - win_ratio) / games)

        ratios.append(win_ratio)
    return ratios
Example #4
0
def read_record(f, size):
    record = f.read(record_size(size))
    board = new_board(size)
    visits = numpy.zeros((size, size))
    offset = 0
    for x in range(size):
        for y in range(size):
            cell = short.unpack_from(record, offset)[0]
            offset += short.size
            board[0,x,y] = (cell >> 6) & 1
            board[1,x,y] = (cell >> 7) & 1
            visits[x,y] = cell & ((1 << 6) - 1)
    won = boolean.unpack_from(record, offset)[0]
    return board, won, visits
Example #5
0
 def clear_board(self):
     self.board = new_board(self.config.size)
     self.history = []
     return ''
Example #6
0
def compare(config, num_games, temp, Temp, name):
    print('\nplaying with', name, 'with vals:', temp, Temp)
    alpha_wins = 0
    wolve_wins = 0

    # while True:
    for i in range(num_games):
        # alpha_agent = TreeSearchPredictor(config.search_config, model, new_board(config.size), True, t, T)
        alpha_agent = TreeSearchPredictor(config.search_config, model,
                                          new_board(config.size), True, temp,
                                          Temp)

        # make sure wolve have new clear board
        wolve.clear_board()
        # start game
        print('its game number: ', i + 1)

        while not winner(alpha_agent.board):
            # alpha turn
            alpha_agent.run(config.iterations)
            value, probabilities = alpha_agent.predict()
            #print(probabilities)
            #probabilities = fix_probabilities(alpha_agent.board, probabilities)
            probabilities = fix_probabilities(alpha_agent.board, probabilities)
            #print(probabilities)
            alpha_move = best_move(probabilities)
            #print('alphaaaaa: ', alpha_move)
            alpha_agent.make_move(alpha_move)
            # insert move to wolve
            letter, number = alpha_move
            alpha_move = str(num_to_letter[letter]) + str(number + 1)
            #print(f'alpha(B): {alpha_move}')
            wolve.insert_move("black", alpha_move)
            if winner(alpha_agent.board):
                print("alpha wins!!!")
                alpha_wins += 1
                continue
            # wolve turn
            wolve_move = wolve.genmove("white")
            #print(f'wolve(W): {wolve_move}')
            letter = letter_to_num[wolve_move[0]]
            number = int(wolve_move[1:]) - 1
            #wolve_move = (letter, number)
            wolve_move = (number, letter)
            # insert wolve move to alpha
            alpha_agent.make_move(wolve_move)
            #print('wove board:')
            #print(wolve.showboard())
            # print('alpha board:')
            # print_board(flip(alpha_agent.board), wolve_move, file=sys.stderr)
            if winner(alpha_agent.board):
                print("wolve wins!!!")
                wolve_wins += 1
                continue

        print(name, 'won', alpha_wins, 'out of', i + 1)
        print('wolve won', wolve_wins, 'out of', i + 1)

    print(name, 'won', alpha_wins, 'times out of', num_games, 'games')
    print('wolve won', wolve_wins, 'times out of', num_games, 'games')
    return alpha_wins / num_games
Example #7
0
from agent1_zero_dnn.tree_search import TreeSearchPredictor,temperature
from convertor.Convertor_ver4 import convert
from convertor.Convertor_ver4 import convert_last_moves
from agent1_zero_dnn.game import refined_moves,best_k_moves,new_board
from agent1_zero_dnn.generate import fix_probabilities
import sys
import numpy as np
import os
from agent1_zero_dnn.config import CompareConfig
import math


model_name="model"
config = CompareConfig()
model=load_model(model_name)
predictor = TreeSearchPredictor(config.search_config, model, new_board(config.size), True)


def adam(params,grads,lr,vs,sqrs,i):
    beta1 = 0.9
    beta2 = 0.999
    eps_stable = 1e-8
    ret_params = []

    for param,grad,v,sqr in zip(params,grads,vs,sqrs):
        g = grad

        v = beta1 * v + (1. - beta1) * g
        sqr = beta2 * sqr + (1. - beta2) * np.square(g)

        v_bias_corr = v / (1. - beta1 ** i)