Esempio n. 1
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        "Play the Gomoku Game!",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-g',
                        '--n_games',
                        type=int,
                        default=100,
                        help='Number of games to play.')
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    import tarfile
    with tarfile.open('input.tar.gz') as tar:
        tar.extractall()
    import construct_dnn
    model = construct_dnn.construct_dnn()
    model.load('tf_model/tf_model')

    import player_A, player_B
    player_A.tf_predict_u.model = player_B.tf_predict_u.model = model
    player_A.initialize()
    player_B.initialize()
    p1 = Player('Black')
    p1.strategy = player_A.strategy
    p2 = Player('White')
    p2.strategy = player_B.strategy

    p1.strategy.learndata = pickle.load(open('black.learndata', 'rb'))
    p2.strategy.learndata = pickle.load(open('white.learndata', 'rb'))

    game.players = [p1, p2]
    if args.n_games > 1:
        game.fastmode = 2

    def playone(i, game_output, winner_board):
        game.reset()
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner))
        game_output.flush()

    winner_board = collections.OrderedDict([(p.name, 0) for p in game.players])
    winner_board["Draw"] = 0
    with open('game_results.txt', 'w') as game_output:
        for i_game in xrange(args.n_games):
            playone(i_game, game_output, winner_board)
    print("Name    |   Games Won")
    for name, nwin in winner_board.items():
        print("%-7s | %7d" % (name, nwin))
    # save the learndata to disk
    pickle.dump(p1.strategy.learndata, open('newblack.learndata', 'wb'))
    pickle.dump(p2.strategy.learndata, open('newwhite.learndata', 'wb'))
    print('%d black learndata and %d white learndata saved!' %
          (len(p1.strategy.learndata), len(p2.strategy.learndata)))
    with tarfile.open('output.tar.gz', 'w:gz') as tar:
        tar.add('newblack.learndata')
        tar.add('newwhite.learndata')
Esempio n. 2
0
def main():
    import argparse
    parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n', '--n_game', type=int, default=10, help='Play a number of games to gather statistics.')
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    import construct_dnn
    model = construct_dnn.construct_dnn()
    model.load('bench_model/tf_model')
    #model.load('trained_model_001/trained_model_001')
    #model.load('trained_model_002/trained_model_002')
    #model.load('trained_model_003/trained_model_003')
    #model.load('trained_model_007/trained_model_007')

    import player_A
    player_A.tf_predict_u.model = model
    player_A.initialize()
    p1 = Player('TF')
    p1.strategy = player_A.strategy
    player_A.estimate_level = 1
    player_A.t_random = 0.01

    import ai_bench
    ai_bench.initialize()
    p2 = Player('BenchAI')
    p2.strategy = ai_bench.strategy
    ai_bench.estimate_level = 4
    ai_bench.t_random = 0.001


    game.players = [p2, p1]
    if args.n_game > 1:
        game.fastmode = 2
    else:
        player_A.show_q = ai_bench.show_q = True

    def playone(i, game_output, winner_board):
        game.reset()
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner))
        game_output.flush()

    winner_board = collections.OrderedDict([(p.name, 0) for p in game.players])
    winner_board["Draw"] = 0
    with open('game_results.txt','w') as game_output:
        for i_game in xrange(args.n_game):
            playone(i_game, game_output, winner_board)
            p1.strategy.learndata = dict()
    print("Name    |   Games Won")
    for name, nwin in winner_board.items():
        print("%-7s | %7d"%(name, nwin))
Esempio n. 3
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        "Play the Gomoku Game!",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n',
                        '--n_train',
                        type=int,
                        default=10,
                        help='Play a number of games to gather statistics.')
    parser.add_argument('-t',
                        '--train_step',
                        type=int,
                        default=100,
                        help='Train a new model after this number of games.')
    parser.add_argument('-e',
                        '--n_epoch',
                        type=int,
                        default=100,
                        help="Number of epochs for each training model")
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    import construct_dnn
    model = construct_dnn.construct_dnn()
    model.load('initial_model/tf_model')

    import player_A, player_B
    player_A.tf_predict_u.model = player_B.tf_predict_u.model = model
    player_A.initialize()
    player_B.initialize()
    p1 = Player('Black')
    p1.strategy = player_A.strategy
    p2 = Player('White')
    p2.strategy = player_B.strategy
    # set up linked learndata and cache (allow AI to look into opponent's data)
    p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
    p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
    player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()

    game.players = [p1, p2]
    if args.train_step > 1:
        game.fastmode = 2
    else:
        player_A.show_q = player_B.show_q = True

    def playone(i, game_output, winner_board):
        game.reset()
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner))
        game_output.flush()

    print("Training the model for %d iterations." % args.n_train)

    for i_train in xrange(args.n_train):
        # check if the current model exists
        model_name = "trained_model_%03d" % i_train
        assert not os.path.exists(
            model_name), "Current model %s already exists!" % model_name
        # create and enter the model folder
        os.mkdir(model_name)
        os.chdir(model_name)
        # play the games
        print("Training model %s" % model_name)
        winner_board = collections.OrderedDict([(p.name, 0)
                                                for p in game.players])
        winner_board["Draw"] = 0
        with open('game_results.txt', 'w') as game_output:
            for i_game in xrange(args.train_step):
                playone(i_game, game_output, winner_board)
        print("Name    |   Games Won")
        for name, nwin in winner_board.items():
            print("%-7s | %7d" % (name, nwin))
        # collect training data
        train_X, train_Y = prepare_train_data(p1.strategy.learndata,
                                              p2.strategy.learndata)
        # reset the learndata and cache, release memory
        p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
        p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
        player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()
        # fit the tf model
        model.fit(train_X,
                  train_Y,
                  n_epoch=args.n_epoch,
                  validation_set=0.1,
                  shuffle=True,
                  show_metric=True)
        model.save('tf_model')
        print("Model %s saved!" % model_name)

        os.chdir('..')
Esempio n. 4
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        "Play the Gomoku Game!",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n',
                        '--n_train',
                        type=int,
                        default=10,
                        help='Play a number of games to gather statistics.')
    parser.add_argument('-t',
                        '--train_step',
                        type=int,
                        default=100,
                        help='Train a new model after this number of games.')
    parser.add_argument('-e',
                        '--n_epoch',
                        type=int,
                        default=100,
                        help="Number of epochs for each training model")
    parser.add_argument('-l', '--begin_lib', help='Begin board library file')
    parser.add_argument('-p',
                        '--begin_lib_p',
                        type=float,
                        default=1.0,
                        help='Possibility of begin lib to be used')
    parser.add_argument('--new',
                        action='store_true',
                        help='Start from new model')
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    import construct_dnn
    model = construct_dnn.construct_dnn()
    if not args.new:
        model.load('initial_model/tf_model')

    import player_A, player_B
    player_A.tf_predict_u.model = player_B.tf_predict_u.model = model
    player_A.initialize()
    player_B.initialize()
    p1 = Player('Black')
    p1.strategy = player_A.strategy
    p2 = Player('White')
    p2.strategy = player_B.strategy
    # set up linked learndata and cache (allow AI to look into opponent's data)
    p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
    p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
    player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()

    game.players = [p1, p2]
    if args.train_step > 1:
        game.fastmode = 2
    else:
        player_A.show_q = player_B.show_q = True

    allstones = set([(r, c) for r in range(1, 16) for c in range(1, 16)])
    if args.begin_lib != None:
        begin_lib = __import__(args.begin_lib).begin_lib
    else:
        begin_lib = None

    def playone(i, game_output, winner_board):
        game.reset()
        player_A.reset()
        player_B.reset()
        if random.random() < args.begin_lib_p:
            game.board = gen_begin_board(allstones, begin_lib)
        else:
            game.board = gen_begin_board(allstones, None)
        # randomly assign a black stone to be the last move
        game.last_move = next(iter(game.board[0]))
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner))
        game_output.flush()

    print("Training the model for %d iterations." % args.n_train)

    for i_train in range(args.n_train):
        # check if the current model exists
        model_name = "trained_model_%03d" % i_train
        assert not os.path.exists(
            model_name), "Current model %s already exists!" % model_name
        # create and enter the model folder
        os.mkdir(model_name)
        os.chdir(model_name)
        # play the games
        print("Training model %s" % model_name)
        winner_board = collections.OrderedDict([(p.name, 0)
                                                for p in game.players])
        winner_board["Draw"] = 0
        with open('game_results.txt', 'w') as game_output:
            for i_game in range(args.train_step):
                playone(i_game, game_output, winner_board)
        print("Name    |   Games Won")
        for name, nwin in winner_board.items():
            print("%-7s | %7d" % (name, nwin))
        # collect training data
        train_X, train_Y = prepare_train_data(p1.strategy.learndata,
                                              p2.strategy.learndata)
        # reset the learndata and cache, release memory
        p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
        p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
        player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()
        # fit the tf model
        #model.trainer.training_state.step = 0 # reset the training step so lr_decay is reset
        model.fit(train_X,
                  train_Y,
                  n_epoch=args.n_epoch,
                  validation_set=0.1,
                  shuffle=True,
                  show_metric=True)
        model.save('tf_model')
        print("Model %s saved!" % model_name)

        os.chdir('..')
Esempio n. 5
0
def main():
    import argparse
    parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.')
    parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.')
    parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model")
    parser.add_argument('-l', '--begin_lib', help='Begin board library file')
    parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used')
    parser.add_argument('--new', action='store_true', help='Start from new model')
    args = parser.parse_args()

    game = Gomoku(board_size=15, first_center=False)

    from tf_model import get_new_model, load_existing_model, save_model
    if args.new:
        model = get_new_model()
    else:
        model = load_existing_model('initial_model/tf_model.h5')

    import player_A, player_B
    player_A.tf_predict_u.model = player_B.tf_predict_u.model = model
    player_A.initialize()
    player_B.initialize()
    p1 = Player('Black')
    p1.strategy = player_A.strategy
    p2 = Player('White')
    p2.strategy = player_B.strategy
    # set up linked learndata and cache (allow AI to look into opponent's data)
    p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
    p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
    player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()


    game.players = [p1, p2]
    if args.train_step > 1:
        game.fastmode = 2
    else:
        player_A.show_q = player_B.show_q = True

    allstones = set([(r,c) for r in range(1,16) for c in range(1,16)])
    if args.begin_lib != None:
        begin_lib = __import__(args.begin_lib).begin_lib
    else:
        begin_lib = None

    def playone(i, game_output, winner_board, replay=False):
        game.reset()
        player_A.reset()
        player_B.reset()
        if replay:
            game.board = copy.deepcopy(game.last_begin_board)
        else:
            if random.random() < args.begin_lib_p:
                game.board = gen_begin_board(allstones, begin_lib)
            else:
                game.board = gen_begin_board(allstones, None)
            # store the begin board
            game.last_begin_board = copy.deepcopy(game.board)
        # randomly assign a black stone to be the last move
        game.last_move = next(iter(game.board[0]))
        winner = game.play()
        winner_board[winner] += 1
        game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner))
        game_output.flush()

    print("Training the model for %d iterations."%args.n_train)

    last_i_train = -1
    for i_train in range(args.n_train):
        # check if the current model exists
        model_name = "trained_model_%03d" % i_train
        if os.path.exists(model_name):
            print(f"Folder {model_name} exists")
            last_i_train = i_train
        else:
            break
    if last_i_train >= 0:
        # try to load the last trained model
        model_name = "trained_model_%03d" % last_i_train
        model_fnm = os.path.join(model_name, 'tf_model.h5')
        if os.path.exists(model_fnm):
            model = load_existing_model(model_fnm)
            print(f"Loaded trained model from {model_fnm}")
        else:
            # if last trained model not exist, load the previous model
            if last_i_train > 0:
                prev_model_name = f"trained_model_{last_i_train-1:03d}"
                prev_model_fnm = os.path.join(prev_model_name, 'tf_model.h5')
                model = load_existing_model(prev_model_fnm)
                print(f"Loaded lastest model from {prev_model_fnm}")
            # try to reuse data and start training
            train_data_fnm = os.path.join(model_name, 'data.h5')
            if os.path.exists(train_data_fnm):
                train_X, train_Y, train_W = load_data_h5(train_data_fnm)
                print(f"Training data loaded from {train_data_fnm}, start training")
                model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W)
                save_model(model, model_fnm)
                print("Model %s saved!" % model_name)
            else:
                # delete this folder and start again
                shutil.rmtree(model_name)
                print(f"Deleting folder {model_name}")
                last_i_train -= 1

    for i_train in range(last_i_train+1, args.n_train):
        model_name = "trained_model_%03d" % i_train
        # create and enter the model folder
        os.mkdir(model_name)
        os.chdir(model_name)
        # play the games
        print("Training model %s" % model_name)
        winner_board = collections.OrderedDict([(p.name, 0) for p in game.players])
        winner_board["Draw"] = 0
        with open('game_results.txt','w') as game_output:
            replay_last_game = False
            for i_game in range(args.train_step):
                if replay_last_game:
                    print("Repeating the starting board of last game")
                playone(i_game, game_output, winner_board, replay=replay_last_game)
                replay_last_game = any(player.strategy.surprised for player in game.players)
        print("Name    |   Games Won")
        for name, nwin in winner_board.items():
            print("%-7s | %7d"%(name, nwin))
        # collect training data
        train_X, train_Y, train_W = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata)
        # reset the learndata and cache, release memory
        p1.strategy.learndata = p2.strategy.opponent_learndata = dict()
        p2.strategy.learndata = p1.strategy.opponent_learndata = dict()
        player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict()
        # fit the tf model
        model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W)
        save_model(model, 'tf_model.h5')
        print("Model %s saved!" % model_name)

        os.chdir('..')
Esempio n. 6
0
            s = line[4:].strip()
            s = s.replace('-', '0')
            s = s.replace('x', '1')
            s = s.replace('o', '-1')
            state.append(s.split())
    print(state)
    return np.array(state, dtype=np.int8)


state = read_state('state_debug.txt')
player_A.print_state(state)

last_move = (4, 11)

player_A.print_state(state)

import construct_dnn
model = construct_dnn.construct_dnn()

player_A.initialize()
player_A.strategy((({(1, 1)}, {}), (1, 1), 1, 15))
player_A.tf_predict_u.model = model
model.load('../../auto_playok_com/tf_model')

player_A.best_action_q(state, 12342, 180, last_move, -1, 1, 1, -1)

import IPython
IPython.embed()

#player_A.find_interesting_moves(state, 120, np.zeros((15,15)), 1, 50, True)