Beispiel #1
0
def train():
    # Iterate over all GMdata files

    training_set = 1
    files = list(range(1, 287))
    shuffle(files)
    for file_num in files:
        print('Training set:', training_set)
        kf = KFold(n_splits=15, shuffle=True)
        early = EarlyStopping(monitor='val_activation_37_loss',
                              min_delta=0.001,
                              patience=10,
                              verbose=1)
        tensorboard = TensorBoard(log_dir='.\TensorBoard')

        filename = '.\Data\GMdata' + str(file_num) + '.pickle'
        file_object = open(filename, 'rb')
        game_record = pickle.load(file_object)
        file_object.close()
        del filename
        del file_object

        for ignore_, train_idx in kf.split(game_record[0]):
            del ignore_
            feats = np.zeros((len(train_idx), 14, 8, 8))
            pis = np.zeros((len(train_idx), 1968))
            results = np.zeros((len(train_idx), 1))

            count = 0
            for index in train_idx:
                feat = features(game_record[0][index])
                feats[count] = feat
                pis[count] = game_record[1][index]
                results[count] = game_record[2][index]

                count += 1

            ktf.set_session(get_session())
            model = load_model(filepath='.\models\model_train.h5')
            model.fit(feats, [pis, results],
                      batch_size=200,
                      epochs=300,
                      verbose=2,
                      callbacks=[early, tensorboard],
                      validation_split=0.3)
            model.save(filepath='.\models\model_train.h5')

            del model

            break

        del game_record
        print('Training epoch:', training_set, 'completed. Training set:',
              file_num, '\n')
        training_set += 1
        gc.collect()
        time.sleep(15)

    print('Training complete.')
    '''
Beispiel #2
0
def iteration(tree, board, C, poss_moves, pipe_sim, **kwargs):
    node = tree
    search_depth = kwargs.get('search_depth', 50)  # 25 fullmoves
    state = deepcopy(board)
    is_winner = False
    state_ = 0
    state_indices = []
    nodes = [node]
    depth = 0
    sleep = 0.000000001

    # Traverse tree until end of game or search depth is reached
    while not is_winner and depth < search_depth:
        # Generate legal moves in position
        legal = sorted([move.uci() for move in state.generate_legal_moves()])

        # Select move
        if len(legal) == 1:
            # If only one legal move, select that move
            index = 0
            state.push(chess.Move.from_uci(legal[0]))
        else:
            # Select move using PUCT equation. See: tools.get_move()
            edges = node_edges(node, legal)
            index = get_move(edges, C)
            move = legal[index]
            state.push(chess.Move.from_uci(move))

        # Update evaluation node
        node = node.nodes[index]
        nodes.append(node)

        # Evaluate and expand
        legal = sorted([move.uci() for move in state.generate_legal_moves()])
        indices = [poss_moves.index(move) for move in legal]
        feats = features(board.fen())
        feats = feats.reshape(1, 14, 8, 8)
        pipe_sim.send(feats)
        pipe_sim.poll(timeout=None)
        priors, value = pipe_sim.recv()

        for move, san in zip(range(len(legal)), legal):
            nodes_update(node, priors, indices, move, san)

        # Bookkeeping
        is_winner = state.is_game_over()
        state_indices.append(index)
        state_ += 1
        depth += 1

    # Backup
    while state_ > 0:
        nodes = nodes[:-1]
        node = nodes[-1]
        i = state_indices[state_ - 1]
        nodes_backup(node, i, value)
        state_ -= 1

    return tree
Beispiel #3
0
def main():
    from keras.models import load_model
    from keras.callbacks import EarlyStopping

    file_Name = ".\\records\\brownie24_self_play_records.pickle"
    fileObject = open(file_Name, 'rb')
    game_records = pickle.load(fileObject)
    fileObject.close()

    early = EarlyStopping(patience=20, verbose=0)
    sample_size = 3000
    for training_epoch in range(100):
        print('Training epoch:', training_epoch, '\n')

        ws = np.zeros((sample_size, 8, 8, 1))
        bs = np.zeros((sample_size, 8, 8, 1))
        ps = np.zeros((sample_size, 1))
        pis = np.zeros((sample_size, 1968))
        results = np.zeros((sample_size, 1))
        sample_indices = np.random.randint(0,
                                           high=len(game_records[0]),
                                           size=sample_size)
        sample = 0
        while sample < sample_size:
            w, b, p = features(game_records[0][sample_indices[sample]])
            ws[sample] = w
            bs[sample] = b
            ps[sample] = p
            pis[sample] = game_records[1][sample_indices[sample]]
            results[sample] = game_records[2][sample_indices[sample]]
            sample += 1

        training_model = load_model(filepath='.\models\model_train.h5')
        training_model.fit([ws, bs, ps], [pis, results],
                           batch_size=200,
                           epochs=300,
                           verbose=2,
                           callbacks=[early],
                           validation_split=0.3)
        training_model.save(filepath='.\models\model_train.h5')
        del training_model
Beispiel #4
0
def mcts(board, poss_moves, pipes_sim, **kwargs):
    start = time.time()
    C = kwargs.get('C', 1.4)
    thinking_time = kwargs.get('thinking_time', 10)
    T = kwargs.get('T', 0.0001)
    tree = kwargs.get('tree', SearchTree())
    state = deepcopy(board)
    legal = sorted([move.uci() for move in board.generate_legal_moves()])
    sleep = 0.000000001

    feats = features(board.fen())
    feats = feats.reshape(1, 14, 8, 8)
    pipes_sim[0].send(feats)
    pipes_sim[0].poll(timeout=None)
    priors, value = pipes_sim[0].recv()

    # Add Dirichlet noise to priors in root node
    noise = np.ravel(
        np.random.dirichlet([0.03, 0.03], size=len(priors)).reshape(1, -1))
    noise = noise[:len(priors)]
    epsilon = 0.25
    priors = ((1 - epsilon) * priors) + (epsilon * noise)

    # Create node for each legal move, ignoring if node already exists for move
    indices = [poss_moves.index(move) for move in legal]
    for move, san in zip(range(len(legal)), legal):
        for child_node in tree.nodes:
            if child_node.name == san:
                # Add Dirichlet noise to prior for node that already exists
                # for move
                prior = child_node.data[3]
                noise = np.random.dirichlet([0.03, 0.03])
                child_node.data[3] = ((1 - epsilon) * prior) + \
                                     (epsilon * noise[0])
                continue

        tree.create_node(P=priors[indices[move]], name=san)

    # While elapsed time < thinking time, search tree:
    tree_queue = Queue()
    sim_queue = Queue()
    sims = []
    for worker in range(len(pipes_sim)):
        pipe_sim = pipes_sim[worker]
        sim = Process(target=parallel_simulation,
                      args=(tree, state, C, poss_moves, pipe_sim, start,
                            thinking_time, tree_queue, sim_queue))
        sims.append(sim)
        sim.start()
    trees = []
    simulations = []

    while len(simulations) < len(sims):
        trees.append(tree_queue.get())
        simulations.append(sim_queue.get())
        time.sleep(sleep)

    print('Simulations:', sum(simulations), '| Thinking time:',
          time.time() - start, 'seconds.')

    # Update master tree nodes based on best Q-values of simulated tree nodes
    for node in range(len(tree.nodes)):
        Q_values = np.zeros(len(trees))
        for tree_ in range(len(trees)):
            Q_values[tree_] = trees[tree_].nodes[node].data[2]
        best = np.random.choice(np.where(Q_values == Q_values.max())[0])
        tree.nodes[node] = trees[best].nodes[node]

    # Select move
    visits = [tree.nodes[move].data[0] for move in range(len(legal))]
    if sum(visits) > 0:
        probs = get_pi(visits, T)
        pi = np.zeros(priors.shape)
        for index, probability in zip(indices, probs):
            pi[index] = probability
        move = np.random.choice(legal, p=probs)
    else:
        pi = priors
        move = np.random.choice(legal)

    pi_move = pi[poss_moves.index(move)]
    index = legal.index(move)

    print('MSE:', mse(pi, priors))
    # Prune tree for reuse in future searches
    tree = tree.nodes[index]

    # If all simulated Q values were negative, seed leaves for new position
    if tree.data[2] == 0:
        state.push(chess.Move.from_uci(move))

        feats = features(board.fen())
        feats = feats.reshape(1, 14, 8, 8)
        pipes_sim[0].send(feats)
        pipes_sim[0].poll(timeout=None)
        priors, value = pipes_sim[0].recv()

        legal = [move_.uci() for move_ in state.generate_legal_moves()]
        indices = [poss_moves.index(move_) for move_ in legal]
        for move_, san in zip(range(len(legal)), legal):
            nodes_update(tree, priors, indices, move_, san)

    print('N:', tree.data[0], '| P:', tree.data[3], '| Pi:', pi_move)
    return move, pi, tree, index, tree.data[2]
def evaluation():
    train_color = randint(0, 1)
    if train_color == 0:
        model1path = '.\models\model_train.h5'
        model2path = '.\models\model_live.h5'
        print('Evaluation network plays as White. Current generator network '
              'plays as Black.')
        player_1 = 'Evaluator'
        player_2 = 'Generator'
    else:
        model1path = '.\models\model_live.h5'
        model2path = '.\models\model_train.h5'
        print('Current generator network plays as White. Evaluation network '
              'plays as Black.')
        player_1 = 'Generator'
        player_2 = 'Evaluator'

    # Initialize neural network daemon for both players
    pipes_net1 = []
    pipes_sim1 = []
    pipes_net2 = []
    pipes_sim2 = []
    for worker in range(cpu_count() - 2):
        # Player 1 pipes
        p1, p2 = Pipe()
        pipes_net1.append(p1)
        pipes_sim1.append(p2)
        # Player 2 pipes
        p3, p4 = Pipe()
        pipes_net2.append(p3)
        pipes_sim2.append(p4)

    nn_p1 = Process(target=nn_daemon, args=(model1path, pipes_net1))
    nn_p2 = Process(target=nn_daemon, args=(model2path, pipes_net2))
    nn_p1.daemon = True
    nn_p2.daemon = True
    nn_p1.start()
    nn_p2.start()

    # Initialize board and game variables
    poss_moves = all_possible_moves()
    board = chess.Bitboard()
    p1tree = SearchTree()
    p2tree = SearchTree()
    move = 1
    T = 0.1  # Temperature coefficient is low for entire evaluation

    # Start daemon
    feats = features(board.fen())
    feats = feats.reshape(1, 14, 8, 8)
    pipes_sim1[0].send(feats)
    pipes_sim2[0].send(feats)
    while not pipes_sim1[0].poll():
        time.sleep(0.0000001)
    while not pipes_sim2[0].poll():
        time.sleep(0.0000001)
    prior_, value_ = pipes_sim1[0].recv()
    prior_, value_ = pipes_sim2[0].recv()

    del prior_
    del value_

    # Play game and record board state features for each move
    print('Game start.')
    while True:
        # Player 1 move
        print(player_1, 'is thinking...')
        p1move, pi, p1tree, index, Q = mcts(board,
                                            poss_moves,
                                            pipes_sim1,
                                            T=T,
                                            tree=p1tree)
        board.push(chess.Move.from_uci(p1move))
        print(board)
        print(player_1, ': ', move, '. ', p1move, ' | Q: ', Q, '\n', sep='')

        # Game ending conditions
        if board.is_game_over():
            if board.is_checkmate():
                winner = 0
                print('Winner:', player_1)
                break
            else:
                winner = -1
                print('Game drawn.')
                break

        if move != 1:
            p2tree = p2tree.nodes[index]

        # Player 2 move
        print(player_2, 'is thinking...')
        p2move, pi, p2tree, index, Q = mcts(board,
                                            poss_moves,
                                            pipes_sim2,
                                            T=T,
                                            tree=p2tree)
        board.push(chess.Move.from_uci(p2move))
        print(board)
        print(player_2, ': ', move, '... ', p2move, ' | Q: ', Q, '\n', sep='')

        if board.is_game_over():
            if board.is_checkmate():
                winner = 1
                print('Winner:', player_2)
                break
            else:
                winner = -1
                print('Game drawn.')
                break

        # Check if game is over by length
        if move == 100:
            winner = -1
            print('Game drawn by length.')
            break

        # Update Player 1 decision tree with Player 2's move
        p1tree = p1tree.nodes[index]

        move += 1

    # Kill neural network daemons
    nn_p1.terminate()
    nn_p2.terminate()

    # Determine trained network's performance
    if winner == -1:
        return 0.5
    elif winner == train_color:
        return 1
    else:
        return 0
def self_play():
    game_start = time.time()

    # Initialize neural network daemon
    modelpath = '.\models\model_live.h5'
    pipes_net = []
    pipes_sim = []
    for worker in range(cpu_count() - 1):
        p1, p2 = Pipe()
        pipes_net.append(p1)
        pipes_sim.append(p2)
    nn_p = Process(target=nn_daemon, args=(modelpath, pipes_net))
    nn_p.daemon = True
    nn_p.start()

    # Initialize board and begin recording features for each board state
    poss_moves = all_possible_moves()
    board = chess.Bitboard()
    game_record = [[board.fen()], [], []]
    p1tree = SearchTree()
    p2tree = SearchTree()
    move = 1
    T = 1

    # Start daemon
    feats = features(board.fen())
    feats = feats.reshape(1, 14, 8, 8)
    pipes_sim[0].send(feats)
    while not pipes_sim[0].poll():
        time.sleep(0.0000001)
    prior_, value_ = pipes_sim[0].recv()

    del prior_
    del value_

    # Play game and record board state features for each move
    print('Game start.')
    while True:
        # Determine temperature coefficient by game length
        if move > 10:
            T = 0.1

        # Player 1 move
        print('Player 1 is thinking...')
        p1move, pi, p1tree, index, Q = mcts(board,
                                            poss_moves,
                                            pipes_sim,
                                            T=T,
                                            tree=p1tree)
        board.push(chess.Move.from_uci(p1move))
        game_record[0].append(board.fen())
        game_record[1].append(deepcopy(pi))

        print(board)
        print('Player 1: ', move, '. ', p1move, ' | Q: ', Q, '\n', sep='')

        # Game ending conditions
        if board.is_game_over():
            if board.is_checkmate():
                winner = 0
                print('Winner: White.')
                print('Game duration:',
                      time.time() - game_start, 'seconds. \n')
                break
            else:
                winner = -1
                print('Game drawn.')
                print('Game duration:',
                      time.time() - game_start, 'seconds. \n')
                break

        if move != 1:
            # Update Player 2's decision tree with Player 1's move
            p2tree = p2tree.nodes[index]

        # Player 2 move
        print('Player 2 is thinking...')
        p2move, pi, p2tree, index, Q = mcts(board,
                                            poss_moves,
                                            pipes_sim,
                                            T=T,
                                            tree=p2tree)
        board.push(chess.Move.from_uci(p2move))
        game_record[0].append(board.fen())
        game_record[1].append(deepcopy(pi))

        print(board)
        print('Player 2: ', move, '... ', p2move, ' | Q: ', Q, '\n', sep='')

        if board.is_game_over():
            if board.is_checkmate():
                winner = 1
                print('Winner: Black.')
                print('Game duration:', time.time() - game_start, 'seconds.\n')
                break
            else:
                winner = -1
                print('Game drawn.')
                print('Game duration:', time.time() - game_start, 'seconds.\n')
                break

        # Check if game is over by length
        if move == 100:
            winner = -1
            print('Game drawn by length.')
            print('Game duration:', time.time() - game_start, 'seconds. \n')
            break

        # Update Player 1 decision tree with Player 2's move
        p1tree = p1tree.nodes[index]

        move += 1

    # Kill neural network daemon
    nn_p.terminate()

    # Delete final board state from game record
    del game_record[0][-1]

    # Assign rewards and penalties
    if winner == 0:
        # Reward Player 1, penalize Player 2
        Z = np.zeros(len(game_record[0]))
        Z[::2] = 1
        Z[1::2] = -1
        for z in Z.tolist():
            game_record[2].append(z)
    elif winner == 1:
        # Penalize Player 1, penalize Player 2
        Z = np.zeros(len(game_record[0]))
        Z[::2] = -1
        Z[1::2] = 1
        for z in Z.tolist():
            game_record[2].append(z)
    else:
        # Slightly penalize draws
        Z = np.full(len(game_record[0]), -0.25)
        for z in Z.tolist():
            game_record[2].append(z)

    return game_record, winner