def run_game(b1_role, mcts_role, b1_N, mcts_N, model, rand=0): print(f'b1 ({" ".join(b1_role)}), N={b1_N}') print(f'mcts ({" ".join(mcts_role)}), N={mcts_N}') curb1 = B1Node(propnet, data, model=model) curmcts = MCTSNode(propnet, data) board = [list('.' * 8) for i in range(6)] for step in range(1000): print(*(''.join(b) for b in board[::-1]), sep='\n') legal = curb1.propnet.legal_moves_dict(curb1.data) b1_moves = choose_move(curb1, b1_role, b1_N, legal, step < rand) mcts_moves = choose_move(curmcts, mcts_role, mcts_N, legal, step < rand) taken_moves = dict( list(zip(b1_role, b1_moves)) + list(zip(mcts_role, mcts_moves))) moves = tuple(taken_moves[role] for role in propnet.roles) curb1 = curb1.get_or_make_child(moves) curmcts = curmcts.get_or_make_child(moves) print('Moves were:') for move in propnet.legal: if move.id in moves and move.move_gdl.strip() != 'noop': print(move.move_role, move.move_gdl) if 'drop' in move.move_gdl: col = int(move.move_gdl.split()[2]) - 1 for i in range(len(board)): if board[i][col] == '.': board[i][col] = move.move_role[0] break if curb1.terminal: print(*(''.join(b) for b in board[::-1]), sep='\n') break print('Results:', curb1.scores) return (sum(curb1.scores[role] for role in b1_role), sum(curb1.scores[role] for role in mcts_role))
def play(self, endtime): root = B1Node(self.propnet, model=self.model) for i in range(500): simulation(root) root.print_node() best, choice = -1, None for i, c in root.move_counts[self.role].items(): if c > best: best, choice = c, i move = self.propnet.id_to_move[choice].move_gdl print('Made move', move) return move
import tensorflow as tf import re import pickle if len(sys.argv) < 4: print("Usage: transfer.py <game> <ckpt> <total_rounds>") game = sys.argv[1] expertckpt = sys.argv[2] totalRounds = int(sys.argv[3]) data, propnet = load_propnet(game) m = Model(propnet, create=False) m.load('./models/' + game + '/step-%06d.ckpt' % int(expertckpt)) cur = [None] start = time.time() gameIndex = 0 for i in range(totalRounds + 1): cur[0] = B1Node(propnet, data, model=m) print('Game number', i) start_game = time.time() do_game(cur, propnet, m, z=0.5) print("took ", time.time() - start_game, "seconds to play game") pickle.dump( m.getBuffer(), open( "buffers/" + game + expertckpt + "for" + str(totalRounds) + "rounds.p", "wb"))
model.perform_transfer('./models/' + from_game+ '/step-%06d.ckpt' % int(ckpt), reuse_output=reuse, pad_0=zeroPad, breakthroughMap=breakthroughMap) if clear: model.clear_output_layer() ## train cur = [None] set_pauser({ 'cur': cur, 'model': model, 'propnet': new_prop, }) model_name = to_game + "from" + from_game + ckpt + mode model.save(model_name,0, transfer=True) start = time.time() for i in range(int(train_to)): cur[0] = B1Node(new_prop, data, model=model) print('Game number', i) start_game = time.time() do_game(cur, new_prop, model, z=0.5) print("took ", time.time()-start_game, "seconds to play game") start_train = time.time() model.train(epochs=10) print("took ", time.time()-start_train, "seconds to train") if i and i % 50 == 0: model.save(model_name, i, transfer=True) with open(f'models_transfer/times-{to_game}', 'a') as f: f.write(f'{i} {time.time()-start}\n')
human_win = myfont.render("You won!", False, (0, 0, 0)) draw = myfont.render("Draw!", False, (0, 0, 0)) size = [1550, 1000] screen = pygame.display.set_mode(size) pygame.display.set_caption('Connect-4 against AlphaZero') clock = pygame.time.Clock() HUMAN = your_role[0] my_role = list(set(propnet.roles) - {your_role})[0] COMPUTER = my_role[0] left_player, right_player = human_text, a0_text N = 500 col = None done = False cur = B1Node(propnet, data, model=model) state = [list('.' * 7) for i in range(6)] if my_role == 'white': show_state(state) move = 3 # get_computer_move() drop(state, move, COMPUTER) moves = make_moves(move, None) cur = cur.get_or_make_child(moves) left_player, right_player = right_player, left_player while not done: for event in pygame.event.get(): # User did something if event.type == pygame.QUIT: # If user clicked close done = True