def train(): # Iterate over all GMdata files training_set = 1 files = list(range(1, 287)) shuffle(files) for file_num in files: print('Training set:', training_set) kf = KFold(n_splits=15, shuffle=True) early = EarlyStopping(monitor='val_activation_37_loss', min_delta=0.001, patience=10, verbose=1) tensorboard = TensorBoard(log_dir='.\TensorBoard') filename = '.\Data\GMdata' + str(file_num) + '.pickle' file_object = open(filename, 'rb') game_record = pickle.load(file_object) file_object.close() del filename del file_object for ignore_, train_idx in kf.split(game_record[0]): del ignore_ feats = np.zeros((len(train_idx), 14, 8, 8)) pis = np.zeros((len(train_idx), 1968)) results = np.zeros((len(train_idx), 1)) count = 0 for index in train_idx: feat = features(game_record[0][index]) feats[count] = feat pis[count] = game_record[1][index] results[count] = game_record[2][index] count += 1 ktf.set_session(get_session()) model = load_model(filepath='.\models\model_train.h5') model.fit(feats, [pis, results], batch_size=200, epochs=300, verbose=2, callbacks=[early, tensorboard], validation_split=0.3) model.save(filepath='.\models\model_train.h5') del model break del game_record print('Training epoch:', training_set, 'completed. Training set:', file_num, '\n') training_set += 1 gc.collect() time.sleep(15) print('Training complete.') '''
def iteration(tree, board, C, poss_moves, pipe_sim, **kwargs): node = tree search_depth = kwargs.get('search_depth', 50) # 25 fullmoves state = deepcopy(board) is_winner = False state_ = 0 state_indices = [] nodes = [node] depth = 0 sleep = 0.000000001 # Traverse tree until end of game or search depth is reached while not is_winner and depth < search_depth: # Generate legal moves in position legal = sorted([move.uci() for move in state.generate_legal_moves()]) # Select move if len(legal) == 1: # If only one legal move, select that move index = 0 state.push(chess.Move.from_uci(legal[0])) else: # Select move using PUCT equation. See: tools.get_move() edges = node_edges(node, legal) index = get_move(edges, C) move = legal[index] state.push(chess.Move.from_uci(move)) # Update evaluation node node = node.nodes[index] nodes.append(node) # Evaluate and expand legal = sorted([move.uci() for move in state.generate_legal_moves()]) indices = [poss_moves.index(move) for move in legal] feats = features(board.fen()) feats = feats.reshape(1, 14, 8, 8) pipe_sim.send(feats) pipe_sim.poll(timeout=None) priors, value = pipe_sim.recv() for move, san in zip(range(len(legal)), legal): nodes_update(node, priors, indices, move, san) # Bookkeeping is_winner = state.is_game_over() state_indices.append(index) state_ += 1 depth += 1 # Backup while state_ > 0: nodes = nodes[:-1] node = nodes[-1] i = state_indices[state_ - 1] nodes_backup(node, i, value) state_ -= 1 return tree
def main(): from keras.models import load_model from keras.callbacks import EarlyStopping file_Name = ".\\records\\brownie24_self_play_records.pickle" fileObject = open(file_Name, 'rb') game_records = pickle.load(fileObject) fileObject.close() early = EarlyStopping(patience=20, verbose=0) sample_size = 3000 for training_epoch in range(100): print('Training epoch:', training_epoch, '\n') ws = np.zeros((sample_size, 8, 8, 1)) bs = np.zeros((sample_size, 8, 8, 1)) ps = np.zeros((sample_size, 1)) pis = np.zeros((sample_size, 1968)) results = np.zeros((sample_size, 1)) sample_indices = np.random.randint(0, high=len(game_records[0]), size=sample_size) sample = 0 while sample < sample_size: w, b, p = features(game_records[0][sample_indices[sample]]) ws[sample] = w bs[sample] = b ps[sample] = p pis[sample] = game_records[1][sample_indices[sample]] results[sample] = game_records[2][sample_indices[sample]] sample += 1 training_model = load_model(filepath='.\models\model_train.h5') training_model.fit([ws, bs, ps], [pis, results], batch_size=200, epochs=300, verbose=2, callbacks=[early], validation_split=0.3) training_model.save(filepath='.\models\model_train.h5') del training_model
def mcts(board, poss_moves, pipes_sim, **kwargs): start = time.time() C = kwargs.get('C', 1.4) thinking_time = kwargs.get('thinking_time', 10) T = kwargs.get('T', 0.0001) tree = kwargs.get('tree', SearchTree()) state = deepcopy(board) legal = sorted([move.uci() for move in board.generate_legal_moves()]) sleep = 0.000000001 feats = features(board.fen()) feats = feats.reshape(1, 14, 8, 8) pipes_sim[0].send(feats) pipes_sim[0].poll(timeout=None) priors, value = pipes_sim[0].recv() # Add Dirichlet noise to priors in root node noise = np.ravel( np.random.dirichlet([0.03, 0.03], size=len(priors)).reshape(1, -1)) noise = noise[:len(priors)] epsilon = 0.25 priors = ((1 - epsilon) * priors) + (epsilon * noise) # Create node for each legal move, ignoring if node already exists for move indices = [poss_moves.index(move) for move in legal] for move, san in zip(range(len(legal)), legal): for child_node in tree.nodes: if child_node.name == san: # Add Dirichlet noise to prior for node that already exists # for move prior = child_node.data[3] noise = np.random.dirichlet([0.03, 0.03]) child_node.data[3] = ((1 - epsilon) * prior) + \ (epsilon * noise[0]) continue tree.create_node(P=priors[indices[move]], name=san) # While elapsed time < thinking time, search tree: tree_queue = Queue() sim_queue = Queue() sims = [] for worker in range(len(pipes_sim)): pipe_sim = pipes_sim[worker] sim = Process(target=parallel_simulation, args=(tree, state, C, poss_moves, pipe_sim, start, thinking_time, tree_queue, sim_queue)) sims.append(sim) sim.start() trees = [] simulations = [] while len(simulations) < len(sims): trees.append(tree_queue.get()) simulations.append(sim_queue.get()) time.sleep(sleep) print('Simulations:', sum(simulations), '| Thinking time:', time.time() - start, 'seconds.') # Update master tree nodes based on best Q-values of simulated tree nodes for node in range(len(tree.nodes)): Q_values = np.zeros(len(trees)) for tree_ in range(len(trees)): Q_values[tree_] = trees[tree_].nodes[node].data[2] best = np.random.choice(np.where(Q_values == Q_values.max())[0]) tree.nodes[node] = trees[best].nodes[node] # Select move visits = [tree.nodes[move].data[0] for move in range(len(legal))] if sum(visits) > 0: probs = get_pi(visits, T) pi = np.zeros(priors.shape) for index, probability in zip(indices, probs): pi[index] = probability move = np.random.choice(legal, p=probs) else: pi = priors move = np.random.choice(legal) pi_move = pi[poss_moves.index(move)] index = legal.index(move) print('MSE:', mse(pi, priors)) # Prune tree for reuse in future searches tree = tree.nodes[index] # If all simulated Q values were negative, seed leaves for new position if tree.data[2] == 0: state.push(chess.Move.from_uci(move)) feats = features(board.fen()) feats = feats.reshape(1, 14, 8, 8) pipes_sim[0].send(feats) pipes_sim[0].poll(timeout=None) priors, value = pipes_sim[0].recv() legal = [move_.uci() for move_ in state.generate_legal_moves()] indices = [poss_moves.index(move_) for move_ in legal] for move_, san in zip(range(len(legal)), legal): nodes_update(tree, priors, indices, move_, san) print('N:', tree.data[0], '| P:', tree.data[3], '| Pi:', pi_move) return move, pi, tree, index, tree.data[2]
def evaluation(): train_color = randint(0, 1) if train_color == 0: model1path = '.\models\model_train.h5' model2path = '.\models\model_live.h5' print('Evaluation network plays as White. Current generator network ' 'plays as Black.') player_1 = 'Evaluator' player_2 = 'Generator' else: model1path = '.\models\model_live.h5' model2path = '.\models\model_train.h5' print('Current generator network plays as White. Evaluation network ' 'plays as Black.') player_1 = 'Generator' player_2 = 'Evaluator' # Initialize neural network daemon for both players pipes_net1 = [] pipes_sim1 = [] pipes_net2 = [] pipes_sim2 = [] for worker in range(cpu_count() - 2): # Player 1 pipes p1, p2 = Pipe() pipes_net1.append(p1) pipes_sim1.append(p2) # Player 2 pipes p3, p4 = Pipe() pipes_net2.append(p3) pipes_sim2.append(p4) nn_p1 = Process(target=nn_daemon, args=(model1path, pipes_net1)) nn_p2 = Process(target=nn_daemon, args=(model2path, pipes_net2)) nn_p1.daemon = True nn_p2.daemon = True nn_p1.start() nn_p2.start() # Initialize board and game variables poss_moves = all_possible_moves() board = chess.Bitboard() p1tree = SearchTree() p2tree = SearchTree() move = 1 T = 0.1 # Temperature coefficient is low for entire evaluation # Start daemon feats = features(board.fen()) feats = feats.reshape(1, 14, 8, 8) pipes_sim1[0].send(feats) pipes_sim2[0].send(feats) while not pipes_sim1[0].poll(): time.sleep(0.0000001) while not pipes_sim2[0].poll(): time.sleep(0.0000001) prior_, value_ = pipes_sim1[0].recv() prior_, value_ = pipes_sim2[0].recv() del prior_ del value_ # Play game and record board state features for each move print('Game start.') while True: # Player 1 move print(player_1, 'is thinking...') p1move, pi, p1tree, index, Q = mcts(board, poss_moves, pipes_sim1, T=T, tree=p1tree) board.push(chess.Move.from_uci(p1move)) print(board) print(player_1, ': ', move, '. ', p1move, ' | Q: ', Q, '\n', sep='') # Game ending conditions if board.is_game_over(): if board.is_checkmate(): winner = 0 print('Winner:', player_1) break else: winner = -1 print('Game drawn.') break if move != 1: p2tree = p2tree.nodes[index] # Player 2 move print(player_2, 'is thinking...') p2move, pi, p2tree, index, Q = mcts(board, poss_moves, pipes_sim2, T=T, tree=p2tree) board.push(chess.Move.from_uci(p2move)) print(board) print(player_2, ': ', move, '... ', p2move, ' | Q: ', Q, '\n', sep='') if board.is_game_over(): if board.is_checkmate(): winner = 1 print('Winner:', player_2) break else: winner = -1 print('Game drawn.') break # Check if game is over by length if move == 100: winner = -1 print('Game drawn by length.') break # Update Player 1 decision tree with Player 2's move p1tree = p1tree.nodes[index] move += 1 # Kill neural network daemons nn_p1.terminate() nn_p2.terminate() # Determine trained network's performance if winner == -1: return 0.5 elif winner == train_color: return 1 else: return 0
def self_play(): game_start = time.time() # Initialize neural network daemon modelpath = '.\models\model_live.h5' pipes_net = [] pipes_sim = [] for worker in range(cpu_count() - 1): p1, p2 = Pipe() pipes_net.append(p1) pipes_sim.append(p2) nn_p = Process(target=nn_daemon, args=(modelpath, pipes_net)) nn_p.daemon = True nn_p.start() # Initialize board and begin recording features for each board state poss_moves = all_possible_moves() board = chess.Bitboard() game_record = [[board.fen()], [], []] p1tree = SearchTree() p2tree = SearchTree() move = 1 T = 1 # Start daemon feats = features(board.fen()) feats = feats.reshape(1, 14, 8, 8) pipes_sim[0].send(feats) while not pipes_sim[0].poll(): time.sleep(0.0000001) prior_, value_ = pipes_sim[0].recv() del prior_ del value_ # Play game and record board state features for each move print('Game start.') while True: # Determine temperature coefficient by game length if move > 10: T = 0.1 # Player 1 move print('Player 1 is thinking...') p1move, pi, p1tree, index, Q = mcts(board, poss_moves, pipes_sim, T=T, tree=p1tree) board.push(chess.Move.from_uci(p1move)) game_record[0].append(board.fen()) game_record[1].append(deepcopy(pi)) print(board) print('Player 1: ', move, '. ', p1move, ' | Q: ', Q, '\n', sep='') # Game ending conditions if board.is_game_over(): if board.is_checkmate(): winner = 0 print('Winner: White.') print('Game duration:', time.time() - game_start, 'seconds. \n') break else: winner = -1 print('Game drawn.') print('Game duration:', time.time() - game_start, 'seconds. \n') break if move != 1: # Update Player 2's decision tree with Player 1's move p2tree = p2tree.nodes[index] # Player 2 move print('Player 2 is thinking...') p2move, pi, p2tree, index, Q = mcts(board, poss_moves, pipes_sim, T=T, tree=p2tree) board.push(chess.Move.from_uci(p2move)) game_record[0].append(board.fen()) game_record[1].append(deepcopy(pi)) print(board) print('Player 2: ', move, '... ', p2move, ' | Q: ', Q, '\n', sep='') if board.is_game_over(): if board.is_checkmate(): winner = 1 print('Winner: Black.') print('Game duration:', time.time() - game_start, 'seconds.\n') break else: winner = -1 print('Game drawn.') print('Game duration:', time.time() - game_start, 'seconds.\n') break # Check if game is over by length if move == 100: winner = -1 print('Game drawn by length.') print('Game duration:', time.time() - game_start, 'seconds. \n') break # Update Player 1 decision tree with Player 2's move p1tree = p1tree.nodes[index] move += 1 # Kill neural network daemon nn_p.terminate() # Delete final board state from game record del game_record[0][-1] # Assign rewards and penalties if winner == 0: # Reward Player 1, penalize Player 2 Z = np.zeros(len(game_record[0])) Z[::2] = 1 Z[1::2] = -1 for z in Z.tolist(): game_record[2].append(z) elif winner == 1: # Penalize Player 1, penalize Player 2 Z = np.zeros(len(game_record[0])) Z[::2] = -1 Z[1::2] = 1 for z in Z.tolist(): game_record[2].append(z) else: # Slightly penalize draws Z = np.full(len(game_record[0]), -0.25) for z in Z.tolist(): game_record[2].append(z) return game_record, winner