def main(): import argparse parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") parser.add_argument('-l', '--begin_lib', help='Begin board library file') parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used') parser.add_argument('--new', action='store_true', help='Start from new model') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) from tf_model import get_new_model, load_existing_model, save_model if args.new: model = get_new_model() else: model = load_existing_model('initial_model/tf_model.h5') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy # set up linked learndata and cache (allow AI to look into opponent's data) p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True allstones = set([(r,c) for r in range(1,16) for c in range(1,16)]) if args.begin_lib != None: begin_lib = __import__(args.begin_lib).begin_lib else: begin_lib = None def playone(i, game_output, winner_board, replay=False): game.reset() player_A.reset() player_B.reset() if replay: game.board = copy.deepcopy(game.last_begin_board) else: if random.random() < args.begin_lib_p: game.board = gen_begin_board(allstones, begin_lib) else: game.board = gen_begin_board(allstones, None) # store the begin board game.last_begin_board = copy.deepcopy(game.board) # randomly assign a black stone to be the last move game.last_move = next(iter(game.board[0])) winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner)) game_output.flush() print("Training the model for %d iterations."%args.n_train) last_i_train = -1 for i_train in range(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train if os.path.exists(model_name): print(f"Folder {model_name} exists") last_i_train = i_train else: break if last_i_train >= 0: # try to load the last trained model model_name = "trained_model_%03d" % last_i_train model_fnm = os.path.join(model_name, 'tf_model.h5') if os.path.exists(model_fnm): model = load_existing_model(model_fnm) print(f"Loaded trained model from {model_fnm}") else: # if last trained model not exist, load the previous model if last_i_train > 0: prev_model_name = f"trained_model_{last_i_train-1:03d}" prev_model_fnm = os.path.join(prev_model_name, 'tf_model.h5') model = load_existing_model(prev_model_fnm) print(f"Loaded lastest model from {prev_model_fnm}") # try to reuse data and start training train_data_fnm = os.path.join(model_name, 'data.h5') if os.path.exists(train_data_fnm): train_X, train_Y, train_W = load_data_h5(train_data_fnm) print(f"Training data loaded from {train_data_fnm}, start training") model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W) save_model(model, model_fnm) print("Model %s saved!" % model_name) else: # delete this folder and start again shutil.rmtree(model_name) print(f"Deleting folder {model_name}") last_i_train -= 1 for i_train in range(last_i_train+1, args.n_train): model_name = "trained_model_%03d" % i_train # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt','w') as game_output: replay_last_game = False for i_game in range(args.train_step): if replay_last_game: print("Repeating the starting board of last game") playone(i_game, game_output, winner_board, replay=replay_last_game) replay_last_game = any(player.strategy.surprised for player in game.players) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d"%(name, nwin)) # collect training data train_X, train_Y, train_W = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # reset the learndata and cache, release memory p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() # fit the tf model model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W) save_model(model, 'tf_model.h5') print("Model %s saved!" % model_name) os.chdir('..')
def main(): import argparse parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") parser.add_argument('-l', '--begin_lib', help='Begin board library file') parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used') parser.add_argument('-b', '--benchmark', action='store_true', default=False, help='Enable benchmark after each training model') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) # load model from tf_model import get_new_model, load_existing_model, save_model model = get_new_model() # search for existing trained model last_i_train = -1 for i_train in range(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train if os.path.exists(model_name): print(f"Folder {model_name} exists") last_i_train = i_train else: break if last_i_train >= 0: # try to load the last trained model model_name = "trained_model_%03d" % last_i_train model_fnm = os.path.join(model_name, 'tf_model.h5') if os.path.exists(model_fnm): model = load_existing_model(model_fnm) print(f"Loaded trained model from {model_fnm}") else: # if last trained model not exist, load the previous model if last_i_train > 0: prev_model_name = f"trained_model_{last_i_train-1:03d}" prev_model_fnm = os.path.join(prev_model_name, 'tf_model.h5') model = load_existing_model(prev_model_fnm) print(f"Loaded lastest model from {prev_model_fnm}") # try to reuse data and start training train_data_fnm = os.path.join(model_name, 'data.h5') if os.path.exists(train_data_fnm): train_X, train_Y, train_W = load_data_h5(train_data_fnm) print(f"Training data loaded from {train_data_fnm}, start training") model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2) save_model(model, model_fnm) print("Model %s saved!" % model_name) # refresh the training by loading it back # model = load_existing_model(model_fnm) else: # delete this folder and start again shutil.rmtree(model_name) print(f"Deleting folder {model_name}") last_i_train -= 1 from AIPlayer import AIPlayer player_A = AIPlayer('Black', model) player_B = AIPlayer('White', model) # set up linked learndata and cache (allow AI to look into opponent's data) player_A.opponent = player_B player_B.opponent = player_A game.players = [player_A, player_B] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True allstones = set([(r,c) for r in range(1,16) for c in range(1,16)]) if args.begin_lib != None: begin_lib = __import__(args.begin_lib).begin_lib else: begin_lib = None def playone(i, game_output, winner_board, replay=False): game.reset() player_A.reset() player_B.reset() if replay: game.board = copy.deepcopy(game.last_begin_board) else: if random.random() < args.begin_lib_p: game.board = gen_begin_board(allstones, begin_lib) else: game.board = gen_begin_board(allstones, None) # store the begin board game.last_begin_board = copy.deepcopy(game.board) # randomly assign a black stone to be the last move game.last_move = next(iter(game.board[0])) winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner)) game_output.flush() print("Training the model for %d iterations."%args.n_train) for i_train in range(last_i_train+1, args.n_train): model_name = "trained_model_%03d" % i_train # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = dict([(p.name, 0) for p in game.players]) winner_board['Draw'] = 0 with open('game_results.txt','w') as game_output: replay_last_game = False i_game = 0 repeating_n = 0 repeat_n_after_surprise = 0 while True: playone(i_game, game_output, winner_board, replay=replay_last_game) surprised = any(player.surprised for player in game.players) if surprised: replay_last_game = True repeat_n_after_surprise = 0 elif repeat_n_after_surprise < 5: # keep replaying at least 5 games after surprise replay_last_game = True else: replay_last_game = False if replay_last_game: repeating_n += 1 repeat_n_after_surprise += 1 print(f"Game {i_game} repeating {repeating_n} | {repeat_n_after_surprise}: {game.last_begin_board}") else: repeating_n = 0 repeat_n_after_surprise = 0 i_game += 1 if i_game >= args.train_step: break print(f"New game {i_game}: {game.last_begin_board}") # prevent memory overflow and getting killed if len(player_A.learndata) > 3000000: print('Learn data is full, stopping') break print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d"%(name, nwin)) # reset player cache player_A.reset_cache() player_B.reset_cache() # collect training data train_X, train_Y, train_W = prepare_train_data(player_A.learndata, player_B.learndata) # fit the model model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2) save_model(model, 'tf_model.h5') print("Model %s saved!" % model_name) # refresh the training by loading it back #model = load_existing_model('tf_model.h5') #player_A.model = player_B.model = model os.chdir('..') if args.benchmark and i_train > 0: prev_model_name = f"trained_model_{i_train-1:03d}" prev_model_name = os.path.join(prev_model_name, 'tf_model.h5') prev_model = load_existing_model(prev_model_name) os.chdir(model_name) with open('benchmark.txt','w') as game_output: # play 1000 games as Black print("New model as Black", file=game_output) player_A.model, player_B.model = model, prev_model winner_board_b = dict([(p.name, 0) for p in game.players]) winner_board_b['Draw'] = 0 for i_game in range(100): playone(i_game, game_output, winner_board_b) print("Name | Games Won", file=game_output) for name, nwin in winner_board_b.items(): print("%-7s | %7d"%(name, nwin), file=game_output) # play 100 games as White print("New model as White", file=game_output) player_A.model, player_B.model = prev_model, model winner_board_w = dict([(p.name, 0) for p in game.players]) winner_board_w['Draw'] = 0 for i_game in range(100): playone(i_game, game_output, winner_board_w) print("Name | Games Won", file=game_output) for name, nwin in winner_board_w.items(): print("%-7s | %7d"%(name, nwin), file=game_output) print('\n\n'+'-' * 50, file=game_output) print(f' | Win Lose Draw', file=game_output) print('-' * 50, file=game_output) print(f' as Black | {winner_board_b[player_A.name]:10} {winner_board_b[player_B.name]:10} {winner_board_b["Draw"]:10}', file=game_output) print(f' as White | {winner_board_w[player_B.name]:10} {winner_board_w[player_A.name]:10} {winner_board_w["Draw"]:10}', file=game_output) print('-' * 50, file=game_output) os.chdir('..') # refresh the training by loading it back model_fnm = os.path.join(model_name, 'tf_model.h5') model = load_existing_model(model_fnm) player_A.model = player_B.model = model
def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") parser.add_argument('-l', '--begin_lib', help='Begin board library file') parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used') parser.add_argument('--new', action='store_true', help='Start from new model') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) from tf_model import get_new_model, load_existing_model, save_model if args.new: model = get_new_model() else: model = load_existing_model('initial_model/tf_model.h5') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy # set up linked learndata and cache (allow AI to look into opponent's data) p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True allstones = set([(r, c) for r in range(1, 16) for c in range(1, 16)]) if args.begin_lib != None: begin_lib = __import__(args.begin_lib).begin_lib else: begin_lib = None def playone(i, game_output, winner_board): game.reset() player_A.reset() player_B.reset() if random.random() < args.begin_lib_p: game.board = gen_begin_board(allstones, begin_lib) else: game.board = gen_begin_board(allstones, None) # randomly assign a black stone to be the last move game.last_move = next(iter(game.board[0])) winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() print("Training the model for %d iterations." % args.n_train) for i_train in range(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train assert not os.path.exists( model_name), "Current model %s already exists!" % model_name # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in range(args.train_step): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # collect training data train_X, train_Y = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # reset the learndata and cache, release memory p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() # fit the tf model model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True) save_model(model, 'tf_model.h5') print("Model %s saved!" % model_name) os.chdir('..')