def player_factory(name): """ Returns a new player object of the given type """ players = { "RANDOM": RandomPlayer(), "GREEDY": GreedyPlayer(), "MINIMAX": MinimaxPlayer(), "EXPECTIMAX": ExpectimaxPlayer(), "IDS": IterativeDeepeningPlayer(), "ALPHABETA": AlphaBetaPlayer(), "HEURISTIC": HeuristicPlayer(), "MINIMAX3": MinimaxPlayer(3), "MINIMAX1": MinimaxPlayer(1) } return players[name]
def test_net_minimax(net, n_games, depth, showbar=False): # returns fraction of games won against minimax adversary return play_n_games(n_games, NetPlayer(net), MinimaxPlayer(depth), randomize=True, showbar=showbar)[1] / n_games
def main(_): # 리플레이 메모리 인스턴스 생성 memory = ReplayMemory(gridSize, maxMemory, discount) # 텐서플로우 초기화 sess = tf.Session() sess.run(tf.global_variables_initializer()) # 세이브 설정 saver = tf.train.Saver() # 모델 로드 if (os.path.isfile(os.getcwd() + "/savedata/OmokModel.ckpt.index") == True): saver.restore(sess, os.getcwd() + "/savedata/OmokModel.ckpt") print('saved model is loaded!') else: print("Training new model") iteration = 0 player1_wincount = 0 player2_wincount = 0 # 게임 플레이 while (True): player1 = QLearningPlayer(10) player2 = MinimaxPlayer(1) result = qlearning_with_minimax(player1, player2, memory, sess, saver, epsilon, iteration) # 승자 확인 if (result == 1): player1_wincount += 1 elif (result == 2): player2_wincount += 1 else: print("WIN PLAYER ERROR!!!") iteration += 1 if (iteration % 10 == 0): #------------------------------------------------------------ # 최근 10판 승률 #------------------------------------------------------------ save_path = saver.save(sess, os.getcwd() + "/savedata/OmokModel.ckpt") print("Iteration : " + str(iteration) + ", Win Rate : " + str(player1_wincount / (player1_wincount + player2_wincount) * 100)) print("Saved On " + save_path) player1_wincount = 0 player2_wincount = 0 sess.close()
while number_of_iterations < MAX_NUMBER_OF_ITERATIONS: if not replay: # randomly initialize opponent weights if not replaying opponent_weights = [] for x in range(NUMBER_OF_WEIGHTS): opponent_weights.append(random.random()) # max_round: number of times players can play against each. set to be the same as number of rounds in actual assessment # initial_stack: starting money. set to be 1000 as per assessment # small_blind_amount: set to be 10 as per assessment config = setup_config(max_round=1000, initial_stack=1000, small_blind_amount=10) config.register_player(name="my_agent", algorithm=MinimaxPlayer(agent_weights)) config.register_player(name="randomly_initialized_agent", algorithm=MinimaxPlayer(opponent_weights)) print("agent: ", agent_weights) print("opponent: ", opponent_weights) game_result = start_poker(config, verbose=1) my_agent_end_stack = game_result["players"][0]["stack"] opponent_end_stack = game_result["players"][1]["stack"] print("agent stack: ", my_agent_end_stack) print("opponent stack: ", opponent_end_stack) if my_agent_end_stack > opponent_end_stack:
from board import Board from game import Game, Result from random_player import RandomPlayer from maxround_player import MaxroundPlayer from edge_player import EdgePlayer from minimax_player import MinimaxPlayer if __name__ == '__main__': n_games = 1000 if len(sys.argv) > 1: n_games = int(sys.argv[1]) if n_games < 0: raise ValueError('cannot run negative number of games') player_black = MinimaxPlayer('Max Minimax', Board.FIELD_BLACK) player_white = MaxroundPlayer('Max Round', Board.FIELD_WHITE) black_wins, white_wins, ties = 0, 0, 0 black_diff, white_diff = 0, 0 for i in range(n_games): game = Game(player_black, player_white) result, diff = game.play() if result == Result.BLACK_WINS: black_wins += 1 black_diff += diff elif result == Result.WHITE_WINS: white_wins += 1 white_diff += diff else:
number_of_iterations = 0 while number_of_iterations < MAX_NUMBER_OF_ITERATIONS: if not replay: # randomly initialize opponent weights if not replaying agent_weights = [] for x in range(NUMBER_OF_WEIGHTS): agent_weights.append(random.random()) # max_round: number of times players can play against each. set to be the same as number of rounds in actual assessment # initial_stack: starting money. set to be 1000 as per assessment # small_blind_amount: set to be 10 as per assessment config = setup_config( max_round=500, initial_stack=10000, small_blind_amount=20) config.register_player( name="my_agent", algorithm=MinimaxPlayer(agent_weights)) config.register_player(name="opponent_agent", algorithm=RandomPlayer()) print("agent: ", agent_weights) game_result = start_poker(config, verbose=1) my_agent_end_stack = game_result["players"][0]["stack"] opponent_end_stack = game_result["players"][1]["stack"] print("agent stack: ", my_agent_end_stack) print("opponent stack: ", opponent_end_stack) if my_agent_end_stack > opponent_end_stack: # agent won so weights remains unchanged
from pypokerengine.api.game import setup_config, start_poker from randomplayer import RandomPlayer from raise_player import RaisedPlayer from minimax_player import MinimaxPlayer # TODO:config the config as our wish config = setup_config(max_round=10, initial_stack=10000, small_blind_amount=20) config.register_player(name="f1", algorithm=MinimaxPlayer( [0.5, 0.5, 0.5, 0.5, 1, 0.5])) config.register_player(name="FT2", algorithm=RaisedPlayer()) game_result = start_poker(config, verbose=0)
from minimax_player import MinimaxPlayer EPISODES = 1000000 # [START] pg vs miniamx if __name__ == "__main__": # 설정 파라미터 MODEL_LOAD = True PRINT_FLAG = False BLACK = 1 WHITE = 2 DEPTH = 1 # 환경과 에이전트 생성 env = Env() player1 = PgAgent() player2 = MinimaxPlayer(DEPTH) global_step = 0 scores, episodes = [], [] for e in range(EPISODES): done = False score = 0 state = env.reset() state = np.reshape(state, [1, player1.state_size]) while not done: # env 초기화 global_step += 1 # 홀수 턴(qlearning player) - Black
player.name, move[0], move[1])) stuck_counter = 0 else: logging.warning('player {:s} illegal move ({:d}, {:d})'.format( player.name, move[0], move[1])) if stuck_counter == 2: logging.warning('game is stuck') break done, black, white = board.outcome() logging.debug('{:s}: {:d}, {:s}: {:d}, finished: {}'.format( self._player_black.name, black, self._player_white.name, white, done)) logging.debug('field \n{}'.format(board.fields)) if black > white: return (Result.BLACK_WINS, black - white) elif white > black: return (Result.WHITE_WINS, white - black) else: return (Result.TIE, 0) if __name__ == '__main__': player_black = StdinPlayer('blacky', Board.FIELD_BLACK) player_white = MinimaxPlayer('whitey', Board.FIELD_WHITE) game = Game(player_black, player_white) result, diff = game.play() print(result, diff)
from pypokerengine.api.game import setup_config, start_poker from randomplayer import RandomPlayer from raise_player import RaisedPlayer from minimax_player import MinimaxPlayer # TODO:config the config as our wish config = setup_config(max_round=10, initial_stack=10000, small_blind_amount=10) config.register_player(name="f1", algorithm=MinimaxPlayer([1, 1, 2, 1.5, 20])) config.register_player(name="FT2", algorithm=RaisedPlayer()) game_result = start_poker(config, verbose=0)