コード例 #1
0
def main():
    import argparse
    parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.')
    parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.')
    parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model")
    parser.add_argument('-l', '--begin_lib', help='Begin board library file')
    parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used')
    parser.add_argument('--new', action='store_true', help='Start from new model')
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    from tf_model import get_new_model, load_existing_model, save_model
    if args.new:
        model = get_new_model()
    else:
        model = load_existing_model('initial_model/tf_model.h5')

    import player_A, player_B
    player_A.tf_predict_u.model = player_B.tf_predict_u.model = model
    player_A.initialize()
    player_B.initialize()
    p1 = Player('Black')
    p1.strategy = player_A.strategy
    p2 = Player('White')
    p2.strategy = player_B.strategy
    # set up linked learndata and cache (allow AI to look into opponent's data)
    p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
    p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
    player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()


    game.players = [p1, p2]
    if args.train_step > 1:
        game.fastmode = 2
    else:
        player_A.show_q = player_B.show_q = True

    allstones = set([(r,c) for r in range(1,16) for c in range(1,16)])
    if args.begin_lib != None:
        begin_lib = __import__(args.begin_lib).begin_lib
    else:
        begin_lib = None

    def playone(i, game_output, winner_board, replay=False):
        game.reset()
        player_A.reset()
        player_B.reset()
        if replay:
            game.board = copy.deepcopy(game.last_begin_board)
        else:
            if random.random() < args.begin_lib_p:
                game.board = gen_begin_board(allstones, begin_lib)
            else:
                game.board = gen_begin_board(allstones, None)
            # store the begin board
            game.last_begin_board = copy.deepcopy(game.board)
        # randomly assign a black stone to be the last move
        game.last_move = next(iter(game.board[0]))
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner))
        game_output.flush()

    print("Training the model for %d iterations."%args.n_train)

    last_i_train = -1
    for i_train in range(args.n_train):
        # check if the current model exists
        model_name = "trained_model_%03d" % i_train
        if os.path.exists(model_name):
            print(f"Folder {model_name} exists")
            last_i_train = i_train
        else:
            break
    if last_i_train >= 0:
        # try to load the last trained model
        model_name = "trained_model_%03d" % last_i_train
        model_fnm = os.path.join(model_name, 'tf_model.h5')
        if os.path.exists(model_fnm):
            model = load_existing_model(model_fnm)
            print(f"Loaded trained model from {model_fnm}")
        else:
            # if last trained model not exist, load the previous model
            if last_i_train > 0:
                prev_model_name = f"trained_model_{last_i_train-1:03d}"
                prev_model_fnm = os.path.join(prev_model_name, 'tf_model.h5')
                model = load_existing_model(prev_model_fnm)
                print(f"Loaded lastest model from {prev_model_fnm}")
            # try to reuse data and start training
            train_data_fnm = os.path.join(model_name, 'data.h5')
            if os.path.exists(train_data_fnm):
                train_X, train_Y, train_W = load_data_h5(train_data_fnm)
                print(f"Training data loaded from {train_data_fnm}, start training")
                model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W)
                save_model(model, model_fnm)
                print("Model %s saved!" % model_name)
            else:
                # delete this folder and start again
                shutil.rmtree(model_name)
                print(f"Deleting folder {model_name}")
                last_i_train -= 1

    for i_train in range(last_i_train+1, args.n_train):
        model_name = "trained_model_%03d" % i_train
        # create and enter the model folder
        os.mkdir(model_name)
        os.chdir(model_name)
        # play the games
        print("Training model %s" % model_name)
        winner_board = collections.OrderedDict([(p.name, 0) for p in game.players])
        winner_board["Draw"] = 0
        with open('game_results.txt','w') as game_output:
            replay_last_game = False
            for i_game in range(args.train_step):
                if replay_last_game:
                    print("Repeating the starting board of last game")
                playone(i_game, game_output, winner_board, replay=replay_last_game)
                replay_last_game = any(player.strategy.surprised for player in game.players)
        print("Name    |   Games Won")
        for name, nwin in winner_board.items():
            print("%-7s | %7d"%(name, nwin))
        # collect training data
        train_X, train_Y, train_W = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata)
        # reset the learndata and cache, release memory
        p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
        p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
        player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()
        # fit the tf model
        model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W)
        save_model(model, 'tf_model.h5')
        print("Model %s saved!" % model_name)

        os.chdir('..')
コード例 #2
0
ファイル: gomoku_train_swap.py プロジェクト: yudongqiu/gomoku
def main():
    import argparse
    parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.')
    parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.')
    parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model")
    parser.add_argument('-l', '--begin_lib', help='Begin board library file')
    parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used')
    parser.add_argument('-b', '--benchmark', action='store_true', default=False, help='Enable benchmark after each training model')
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    # load model
    from tf_model import get_new_model, load_existing_model, save_model
    model = get_new_model()
    # search for existing trained model
    last_i_train = -1
    for i_train in range(args.n_train):
        # check if the current model exists
        model_name = "trained_model_%03d" % i_train
        if os.path.exists(model_name):
            print(f"Folder {model_name} exists")
            last_i_train = i_train
        else:
            break
    if last_i_train >= 0:
        # try to load the last trained model
        model_name = "trained_model_%03d" % last_i_train
        model_fnm = os.path.join(model_name, 'tf_model.h5')
        if os.path.exists(model_fnm):
            model = load_existing_model(model_fnm)
            print(f"Loaded trained model from {model_fnm}")
        else:
            # if last trained model not exist, load the previous model
            if last_i_train > 0:
                prev_model_name = f"trained_model_{last_i_train-1:03d}"
                prev_model_fnm = os.path.join(prev_model_name, 'tf_model.h5')
                model = load_existing_model(prev_model_fnm)
                print(f"Loaded lastest model from {prev_model_fnm}")
            # try to reuse data and start training
            train_data_fnm = os.path.join(model_name, 'data.h5')
            if os.path.exists(train_data_fnm):
                train_X, train_Y, train_W = load_data_h5(train_data_fnm)
                print(f"Training data loaded from {train_data_fnm}, start training")
                model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2)
                save_model(model, model_fnm)
                print("Model %s saved!" % model_name)
                # refresh the training by loading it back
                # model = load_existing_model(model_fnm)
            else:
                # delete this folder and start again
                shutil.rmtree(model_name)
                print(f"Deleting folder {model_name}")
                last_i_train -= 1


    from AIPlayer import AIPlayer
    player_A = AIPlayer('Black', model)
    player_B = AIPlayer('White', model)
    # set up linked learndata and cache (allow AI to look into opponent's data)
    player_A.opponent = player_B
    player_B.opponent = player_A

    game.players = [player_A, player_B]
    if args.train_step > 1:
        game.fastmode = 2
    else:
        player_A.show_q = player_B.show_q = True

    allstones = set([(r,c) for r in range(1,16) for c in range(1,16)])
    if args.begin_lib != None:
        begin_lib = __import__(args.begin_lib).begin_lib
    else:
        begin_lib = None

    def playone(i, game_output, winner_board, replay=False):
        game.reset()
        player_A.reset()
        player_B.reset()
        if replay:
            game.board = copy.deepcopy(game.last_begin_board)
        else:
            if random.random() < args.begin_lib_p:
                game.board = gen_begin_board(allstones, begin_lib)
            else:
                game.board = gen_begin_board(allstones, None)
            # store the begin board
            game.last_begin_board = copy.deepcopy(game.board)
        # randomly assign a black stone to be the last move
        game.last_move = next(iter(game.board[0]))
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner))
        game_output.flush()

    print("Training the model for %d iterations."%args.n_train)

    for i_train in range(last_i_train+1, args.n_train):
        model_name = "trained_model_%03d" % i_train
        # create and enter the model folder
        os.mkdir(model_name)
        os.chdir(model_name)
        # play the games
        print("Training model %s" % model_name)
        winner_board = dict([(p.name, 0) for p in game.players])
        winner_board['Draw'] = 0
        with open('game_results.txt','w') as game_output:
            replay_last_game = False
            i_game = 0
            repeating_n = 0
            repeat_n_after_surprise = 0
            while True:
                playone(i_game, game_output, winner_board, replay=replay_last_game)
                surprised = any(player.surprised for player in game.players)
                if surprised:
                    replay_last_game = True
                    repeat_n_after_surprise = 0
                elif repeat_n_after_surprise < 5:
                    # keep replaying at least 5 games after surprise
                    replay_last_game = True
                else:
                    replay_last_game = False
                if replay_last_game:
                    repeating_n += 1
                    repeat_n_after_surprise += 1
                    print(f"Game {i_game} repeating {repeating_n} | {repeat_n_after_surprise}: {game.last_begin_board}")
                else:
                    repeating_n = 0
                    repeat_n_after_surprise = 0
                    i_game += 1
                    if i_game >= args.train_step:
                        break
                    print(f"New game {i_game}: {game.last_begin_board}")
                # prevent memory overflow and getting killed
                if len(player_A.learndata) > 3000000:
                    print('Learn data is full, stopping')
                    break
        print("Name    |   Games Won")
        for name, nwin in winner_board.items():
            print("%-7s | %7d"%(name, nwin))
        # reset player cache
        player_A.reset_cache()
        player_B.reset_cache()
        # collect training data
        train_X, train_Y, train_W = prepare_train_data(player_A.learndata, player_B.learndata)
        # fit the model
        model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2)
        save_model(model, 'tf_model.h5')
        print("Model %s saved!" % model_name)
        # refresh the training by loading it back
        #model = load_existing_model('tf_model.h5')
        #player_A.model = player_B.model = model
        os.chdir('..')
        if args.benchmark and i_train > 0:
            prev_model_name = f"trained_model_{i_train-1:03d}"
            prev_model_name = os.path.join(prev_model_name, 'tf_model.h5')
            prev_model = load_existing_model(prev_model_name)
            os.chdir(model_name)
            with open('benchmark.txt','w') as game_output:
                # play 1000 games as Black
                print("New model as Black", file=game_output)
                player_A.model, player_B.model = model, prev_model
                winner_board_b = dict([(p.name, 0) for p in game.players])
                winner_board_b['Draw'] = 0
                for i_game in range(100):
                    playone(i_game, game_output, winner_board_b)
                print("Name    |   Games Won", file=game_output)
                for name, nwin in winner_board_b.items():
                    print("%-7s | %7d"%(name, nwin), file=game_output)
                # play 100 games as White
                print("New model as White", file=game_output)
                player_A.model, player_B.model = prev_model, model
                winner_board_w = dict([(p.name, 0) for p in game.players])
                winner_board_w['Draw'] = 0
                for i_game in range(100):
                    playone(i_game, game_output, winner_board_w)
                print("Name    |   Games Won", file=game_output)
                for name, nwin in winner_board_w.items():
                    print("%-7s | %7d"%(name, nwin), file=game_output)
                print('\n\n'+'-' * 50, file=game_output)
                print(f'           |   Win           Lose           Draw', file=game_output)
                print('-' * 50, file=game_output)
                print(f' as Black  | {winner_board_b[player_A.name]:10} {winner_board_b[player_B.name]:10} {winner_board_b["Draw"]:10}', file=game_output)
                print(f' as White  | {winner_board_w[player_B.name]:10} {winner_board_w[player_A.name]:10} {winner_board_w["Draw"]:10}', file=game_output)
                print('-' * 50, file=game_output)
            os.chdir('..')
            # refresh the training by loading it back
            model_fnm = os.path.join(model_name, 'tf_model.h5')
            model = load_existing_model(model_fnm)
            player_A.model = player_B.model = model
コード例 #3
0
ファイル: gomoku_train_swap.py プロジェクト: yudongqiu/gomoku
def main():
    import argparse
    parser = argparse.ArgumentParser(
        "Play the Gomoku Game!",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n',
                        '--n_train',
                        type=int,
                        default=10,
                        help='Play a number of games to gather statistics.')
    parser.add_argument('-t',
                        '--train_step',
                        type=int,
                        default=100,
                        help='Train a new model after this number of games.')
    parser.add_argument('-e',
                        '--n_epoch',
                        type=int,
                        default=100,
                        help="Number of epochs for each training model")
    parser.add_argument('-l', '--begin_lib', help='Begin board library file')
    parser.add_argument('-p',
                        '--begin_lib_p',
                        type=float,
                        default=1.0,
                        help='Possibility of begin lib to be used')
    parser.add_argument('--new',
                        action='store_true',
                        help='Start from new model')
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    from tf_model import get_new_model, load_existing_model, save_model
    if args.new:
        model = get_new_model()
    else:
        model = load_existing_model('initial_model/tf_model.h5')

    import player_A, player_B
    player_A.tf_predict_u.model = player_B.tf_predict_u.model = model
    player_A.initialize()
    player_B.initialize()
    p1 = Player('Black')
    p1.strategy = player_A.strategy
    p2 = Player('White')
    p2.strategy = player_B.strategy
    # set up linked learndata and cache (allow AI to look into opponent's data)
    p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
    p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
    player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()

    game.players = [p1, p2]
    if args.train_step > 1:
        game.fastmode = 2
    else:
        player_A.show_q = player_B.show_q = True

    allstones = set([(r, c) for r in range(1, 16) for c in range(1, 16)])
    if args.begin_lib != None:
        begin_lib = __import__(args.begin_lib).begin_lib
    else:
        begin_lib = None

    def playone(i, game_output, winner_board):
        game.reset()
        player_A.reset()
        player_B.reset()
        if random.random() < args.begin_lib_p:
            game.board = gen_begin_board(allstones, begin_lib)
        else:
            game.board = gen_begin_board(allstones, None)
        # randomly assign a black stone to be the last move
        game.last_move = next(iter(game.board[0]))
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner))
        game_output.flush()

    print("Training the model for %d iterations." % args.n_train)

    for i_train in range(args.n_train):
        # check if the current model exists
        model_name = "trained_model_%03d" % i_train
        assert not os.path.exists(
            model_name), "Current model %s already exists!" % model_name
        # create and enter the model folder
        os.mkdir(model_name)
        os.chdir(model_name)
        # play the games
        print("Training model %s" % model_name)
        winner_board = collections.OrderedDict([(p.name, 0)
                                                for p in game.players])
        winner_board["Draw"] = 0
        with open('game_results.txt', 'w') as game_output:
            for i_game in range(args.train_step):
                playone(i_game, game_output, winner_board)
        print("Name    |   Games Won")
        for name, nwin in winner_board.items():
            print("%-7s | %7d" % (name, nwin))
        # collect training data
        train_X, train_Y = prepare_train_data(p1.strategy.learndata,
                                              p2.strategy.learndata)
        # reset the learndata and cache, release memory
        p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
        p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
        player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()
        # fit the tf model
        model.fit(train_X,
                  train_Y,
                  epochs=args.n_epoch,
                  validation_split=0.2,
                  shuffle=True)
        save_model(model, 'tf_model.h5')
        print("Model %s saved!" % model_name)

        os.chdir('..')