def board_config_test(model_cls): """ Test the results obtained by a model in pre-defined board configurations and return a percentage of the success. model : Neural Network Model """ game = Connect4Env(None) success = 0 model = model_cls for elem in BOARD_CONFIGS: board = BOARD_CONFIGS[elem][0] act_to_do = BOARD_CONFIGS[elem][1] game.reset() game.board = board input_dict = {"obs": {}} # if model.use_conv: # reshaped_board = np.expand_dims(board, axis=(0,-1)) # else: # reshaped_board = np.reshape(board, (1, board.shape[0] * board.shape[1])) action_mask = game.get_moves(True) input_dict["obs"]["state"] = board #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model.forward(input_dict, None, None) act = np.argmax(action_logits) if act in act_to_do: success += 1 num_of_configs = len(BOARD_CONFIGS) success_rate = success / num_of_configs return success_rate, success, num_of_configs
def player_vs_AI(screen,model,lstm_model,weights,close_window = True ): global agent game = Connect4Env(None,width=Connect3Config.WIDTH, height=Connect3Config.HEIGHT, n_actions=Connect3Config.N_ACTIONS, connect=Connect3Config.CONNECT, ) board = game.board board_to_print = np.transpose(board) draw_board(board_to_print,screen) myfont = pygame.font.SysFont("monospace", 55) full_game = [] timestep = 0 game_over = False actions = {} print("printing agent = " + str(agent)) if agent is not None: print("Agent chosen using the model") w2_indx = lvl_to_indx(int(agent)) w2_key = list(weights.keys())[w2_indx] w2 = weights[w2_key] lvl = indx_to_lvl(w2_indx) # lvl = weights.keys().index(w2) else: print("Agent chosen randomly...") w2_indx = np.random.choice(range(len(weights))) w2_key = list(weights.keys())[w2_indx] w2 = weights[w2_key] lvl = indx_to_lvl(w2_indx) if randomize: starting_player = random.choice([player1_ID, player2_ID]) else: starting_player = player1_ID game.reset(starting_player=starting_player,randomize=False) board_plus_action_total = [] done = {} done["__all__"] = False print("You are now playing against an agent of level " + str(lvl) ) while not game_over: timestep += 1 actual_player = game.current_player board = game.board board_p2 = game.board_p2 for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.MOUSEMOTION: pygame.draw.rect(screen, BLACK, (0,0, width, SQUARESIZE)) posx = event.pos[0] if actual_player == player1_ID: pygame.draw.circle(screen, RED, (posx, int(SQUARESIZE/2)), RADIUS) pygame.display.update() if event.type == pygame.MOUSEBUTTONDOWN: pygame.draw.rect(screen, BLACK, (0,0, width, SQUARESIZE)) #print(event.pos) # Ask for Player 1 Input if actual_player == player1_ID: action_mask = game.get_moves(False) posx = event.pos[0] act = int(math.floor(posx/SQUARESIZE)) print("player 1 move: " + str(act)) if act in action_mask: flattened_board = np.ndarray.flatten(board) board_plus_actions = np.append(flattened_board,float(act)) board_plus_action_total.append([board_plus_actions]) actions[player1] = act _, rew, done, _ = game.step(actions) #print_board(board) board = game.board board_to_print = np.transpose(board) draw_board(board_to_print,screen) # # Ask for Player 2 Input if actual_player == player2_ID: input_dict = {"obs": {}} action_mask = game.get_moves(True) input_dict["obs"]["state"] = board_p2 #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model.forward(input_dict, None, None) if timestep > number_of_stochastic_moves: act = np.argmax(action_logits[0]) elif timestep <= number_of_stochastic_moves: action_prob = [np.exp(single_log)/sum(np.exp(action_logits[0])) for single_log in action_logits[0]] act = np.random.choice([0,1,2,3,4],1,p=action_prob)[0] actions[player2] = act pygame.time.wait(1000) _, rew, done, _ = game.step(actions) print(game) #game.render() board = game.board board_to_print = np.transpose(board) draw_board(board_to_print,screen) if done["__all__"]: # ADD ENCODED GAME TO THE LISt print(rew) if rew["player1"] == 1.0: print("Player 1 won!!") label = myfont.render("Player 1 won!!", 1, RED) screen.blit(label, (40, 10)) pygame.display.update() pygame.time.wait(1000) elif rew["player1"] == -1.0: print("Player 2 won!!") label = myfont.render("Player 2 won!!", 1, YELLOW) screen.blit(label, (40,10)) pygame.display.update() pygame.time.wait(1000) elif rew["player1"] == 0.0: print("Draw") label = myfont.render("Draw!!", 1, WHITE) screen.blit(label, (40,10)) pygame.display.update() pygame.time.wait(1000) if len(board_plus_action_total) < sequence_len: print("Game finished too early, restarting...") timestep = 0 game.reset(randomize=True) game_over = False continue game_over = True board_plus_action_and_outcome = board_plus_action_total for j in range(len(board_plus_action_and_outcome)-(sequence_len-1)): full_game.append([]) full_game[-1].append(board_plus_action_and_outcome[j:j+sequence_len]) if game_over: full_game = np.asarray(full_game) full_game = np.squeeze(full_game) if len(full_game.shape) == 2: full_game = np.expand_dims(full_game, axis=0) full_game = full_game.astype("float32") y = lstm_model(full_game,training=False) predicted_values = tf.math.reduce_mean(y,axis=0) predicted_indx = tf.math.argmax(predicted_values) agent = predicted_indx print("Model output probability: " + str(predicted_values.numpy())) pygame.time.wait(3000) if close_window: pygame.display.quit()
# generate a fake input to define the model stucture and then load the weights # [batch,timestep,features] # random_input = np.random.rand(1,lstm_timesteps,features_len) random_input = np.random.rand(1,lstm_timesteps,features_len) random_input = random_input.astype('float32') lstm_model(random_input) lstm_model.set_weights(lstm_weights[()]) randomize = True player1_ID = Connect3Config.PLAYER1_ID player2_ID = Connect3Config.PLAYER2_ID player1 = Connect3Config.PLAYER1 player2 = Connect3Config.PLAYER2 game = Connect4Env(None,width=Connect3Config.WIDTH, height=Connect3Config.HEIGHT, n_actions=Connect3Config.N_ACTIONS, connect=Connect3Config.CONNECT, ) # ============================================================================= # PYGAME # ============================================================================= # pygame.init() # #pygame.display.update() # screen = pygame.display.set_mode(size) # pygame.display.set_caption("Connect 3") # player_vs_AI(screen,model,lstm_model) app = tkinterApp(model,lstm_model,weights) app.mainloop()
def model_vs_model_stochastic(model1, model2, number_of_games, discount_rate, randomize=True): """ Compute the results of different games between 2 models. Instead of picking the best action, we pick an action with probability equals to the action probability Parameters ---------- model1 : Neural Network model model2 : Neural Network model number_of_games : int number of games to play discount_rate : float scale the final reward randomize : Bool decide if the first player to move is decided randomly Returns ------- """ game = Connect4Env(None) # how long a game last final_value = 0 p1_win = 0 for i in range(number_of_games): timestep = 0 reward = 0 game_over = False actions = {} if randomize: starting_player = random.choice([player1_ID, player2_ID]) else: starting_player = player1_ID game.reset(starting_player=starting_player) while not game_over: timestep += 1 actual_player = game.current_player board = game.board board_p2 = game.board_p2 if actual_player == player1_ID: input_dict = {"obs": {}} # if model1.use_conv: # reshaped_board = np.expand_dims(board, axis=(0,-1)) # else: # reshaped_board = np.reshape(board, (1, board.shape[0] * board.shape[1])) action_mask = game.get_moves(True) input_dict["obs"]["state"] = board #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model1.forward(input_dict, None, None) action_prob = [ np.exp(single_log) / sum(np.exp(action_logits[0])) for single_log in action_logits[0] ] act = np.random.choice([0, 1, 2, 3, 4, 5, 6], 1, p=action_prob)[0] #act = np.argmax(action_logits[0]) actions[player1] = act _, rew, done, _ = game.step(actions) elif actual_player == player2_ID: input_dict = {"obs": {}} # if model2.use_conv: # reshaped_board = np.expand_dims(board_p2, axis=(0,-1)) # else: # reshaped_board = np.reshape(board_p2, (1, board_p2.shape[0] * board_p2.shape[1])) action_mask = game.get_moves(True) input_dict["obs"]["state"] = board_p2 #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model2.forward(input_dict, None, None) action_prob = [ np.exp(single_log) / sum(np.exp(action_logits[0])) for single_log in action_logits[0] ] act = np.random.choice([0, 1, 2, 3, 4, 5, 6], 1, p=action_prob)[0] actions[player2] = act _, rew, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if done["__all__"]: game_over = True reward = rew["player2"] if rew["player1"] == 1.0: p1_win += 1.0 if discount_rate == 1: final_value += reward else: final_value += (discount_rate**timestep) * reward final_value = final_value / number_of_games p1_win_rate = p1_win / number_of_games return final_value, p1_win_rate
def model_vs_model_connect3_generate_data(model1, model2, number_of_games, discount_rate, randomize=True, number_of_stochastic_moves=0): """ generates game data from games with the board seen by model1 pov """ game = Connect4Env( None, width=Connect3Config.WIDTH, height=Connect3Config.HEIGHT, n_actions=Connect3Config.N_ACTIONS, connect=Connect3Config.CONNECT, ) # how long a game last final_value = 0 p1_win = 0 # game encoded as a string games_list = [] number_of_equal_games = 0 for i in range(number_of_games): timestep = 0 reward = 0 game_over = False actions = {} encoded_game = [] if randomize: starting_player = random.choice([player1_ID, player2_ID]) if starting_player == player1_ID: encoded_game.append("p1_") elif starting_player == player2_ID: encoded_game.append("p2_") else: starting_player = player1_ID encoded_game.append("p1_") game.reset(starting_player=starting_player, randomize=False) while not game_over: timestep += 1 actual_player = game.current_player board = game.board board_p2 = game.board_p2 if actual_player == player1_ID: input_dict = {"obs": {}} action_mask = game.get_moves(True) input_dict["obs"]["state"] = board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model1.forward(input_dict, None, None) if timestep > number_of_stochastic_moves: act = np.argmax(action_logits[0]) elif timestep <= number_of_stochastic_moves: action_prob = [ np.exp(single_log) / sum(np.exp(action_logits[0])) for single_log in action_logits[0] ] act = np.random.choice([0, 1, 2, 3, 4], 1, p=action_prob)[0] encoded_game.append(str(act)) actions[player1] = act _, rew, done, _ = game.step(actions) elif actual_player == player2_ID: input_dict = {"obs": {}} action_mask = game.get_moves(True) input_dict["obs"]["state"] = board_p2 #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model2.forward(input_dict, None, None) if timestep > number_of_stochastic_moves: act = np.argmax(action_logits[0]) elif timestep <= number_of_stochastic_moves: action_prob = [ np.exp(single_log) / sum(np.exp(action_logits[0])) for single_log in action_logits[0] ] act = np.random.choice([0, 1, 2, 3, 4], 1, p=action_prob)[0] encoded_game.append(str(act)) actions[player2] = act _, rew, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if done["__all__"]: game_over = True game_str = ''.join(encoded_game) # ADD ENCODED GAME TO THE LISt if game_str in games_list: number_of_equal_games += 1 elif game_str not in games_list: games_list.append(game_str) reward = rew["player2"] if rew["player1"] == 1.0: p1_win += 1.0 if discount_rate == 1: final_value += reward else: final_value += (discount_rate**timestep) * reward print("The number of equal games is: " + str(number_of_equal_games)) final_value = final_value / number_of_games p1_win_rate = p1_win / number_of_games return final_value, p1_win_rate, games_list
def model_vs_model_connect3(model1, model2, number_of_games, discount_rate, randomize=True, number_of_stochastic_moves=0): game = Connect4Env( None, width=Connect3Config.WIDTH, height=Connect3Config.HEIGHT, n_actions=Connect3Config.N_ACTIONS, connect=Connect3Config.CONNECT, ) # how long a game last final_value = 0 p1_win = 0 for i in range(number_of_games): timestep = 0 reward = 0 game_over = False actions = {} if randomize: starting_player = random.choice([player1_ID, player2_ID]) else: starting_player = player1_ID game.reset(starting_player=starting_player) while not game_over: timestep += 1 actual_player = game.current_player board = game.board board_p2 = game.board_p2 if actual_player == player1_ID: input_dict = {"obs": {}} action_mask = game.get_moves(True) input_dict["obs"]["state"] = board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model1.forward(input_dict, None, None) if timestep > number_of_stochastic_moves: act = np.argmax(action_logits[0]) elif timestep <= number_of_stochastic_moves: action_prob = [ np.exp(single_log) / sum(np.exp(action_logits[0])) for single_log in action_logits[0] ] act = np.random.choice([0, 1, 2, 3, 4], 1, p=action_prob)[0] actions[player1] = act _, rew, done, _ = game.step(actions) elif actual_player == player2_ID: input_dict = {"obs": {}} action_mask = game.get_moves(True) input_dict["obs"]["state"] = board_p2 #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model2.forward(input_dict, None, None) if timestep > number_of_stochastic_moves: act = np.argmax(action_logits[0]) elif timestep <= number_of_stochastic_moves: action_prob = [ np.exp(single_log) / sum(np.exp(action_logits[0])) for single_log in action_logits[0] ] act = np.random.choice([0, 1, 2, 3, 4], 1, p=action_prob)[0] actions[player2] = act _, rew, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if done["__all__"]: game_over = True reward = rew["player2"] if rew["player1"] == 1.0: p1_win += 1.0 if discount_rate == 1: final_value += reward else: final_value += (discount_rate**timestep) * reward final_value = final_value / number_of_games p1_win_rate = p1_win / number_of_games return final_value, p1_win_rate
def model_vs_minimax_connect3_stochastic(model, depth, number_of_games, checkpoint=None, logger=None, randomize=True): """ used to evaluate the model against the minimax algorithm. The action chosen by the model is picked following the distribution of the action probabilities. """ game = Connect4Env( None, width=Connect3Config.WIDTH, height=Connect3Config.HEIGHT, n_actions=Connect3Config.N_ACTIONS, connect=Connect3Config.CONNECT, ) model_name = model.name if logger: logger.info("**********" + str(model_name) + "_(X) VS (O)_MINIMAX_depth_" + str(depth) + "**********") print("Starting Evaluation") for i in range(number_of_games): game_over = False actions = {} if randomize: starting_player = random.choice([player1_ID, player2_ID]) else: starting_player = player1_ID game.reset(starting_player=starting_player, randomize=False) while not game_over: actual_player = game.current_player board = game.board if actual_player == player1_ID: input_dict = {"obs": {}} # if model.use_conv: # reshaped_board = np.expand_dims(board, axis=(0,-1)) # else: # reshaped_board = np.reshape(board, (1, board.shape[0] * board.shape[1])) action_mask = game.get_moves(True) input_dict["obs"]["state"] = board #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model.forward(input_dict, None, None) action_prob = [ np.exp(single_log) / sum(np.exp(action_logits[0])) for single_log in action_logits[0] ] act = np.random.choice([0, 1, 2, 3, 4], 1, p=action_prob)[0] actions[player1] = act _, _, done, _ = game.step(actions) elif actual_player == player2_ID: act, _ = minimax_connect3(board, player2_ID, True, depth=depth) actions[player2] = act _, _, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if logger: logger.info("Game number " + str(i) + "/" + str(number_of_games)) logger.info("Player " + str(actual_player + 1) + " actions: " + str(act)) logger.info("\n" + repr(board)) logger.info( board_print(board, Connect3Config.HEIGHT, Connect3Config.WIDTH)) if done["__all__"]: if logger: logger.info("PLAYER " + str(game.winner + 1) + " WON...") logger.info("CURRENT SCORE: " + str(game.score[player1]) + " VS " + str(game.score[player2])) game_over = True # score = game.score[player1] / number_of_games + game.num_draws / ( # 2 * number_of_games # ) print("Evaluation Over") elo_diff = compute_elo_difference(game.score[player1], game.num_draws, number_of_games) # print("\nplayer 1 score: " + str(game.score[player1])) # print("player 2 score: " + str(game.score[player2])) # print("number of draw: " + str(game.num_draws)) # print( # "elo difference computed over " # + str(number_of_games) # + " between the 2 algortithms is " # + str(elo_diff) # ) return elo_diff, game.score[player1], game.score[player2], game.num_draws
def model_vs_minimax_connect3(model, depth, number_of_games, checkpoint=None, logger=None, randomize=True): """ used to evaluate the model against the minimax algorithm. The action chosen by the model is picked following the distribution of the action probabilities. """ game = Connect4Env( None, width=Connect3Config.WIDTH, height=Connect3Config.HEIGHT, n_actions=Connect3Config.N_ACTIONS, connect=Connect3Config.CONNECT, ) model_name = model.name if logger: logger.info("**********" + str(model_name) + "_(X) VS (O)_MINIMAX_depth_" + str(depth) + "**********") print("Starting Evaluation") for i in range(number_of_games): game_over = False actions = {} if randomize: starting_player = random.choice([player1_ID, player2_ID]) else: starting_player = player1_ID game.reset(starting_player=starting_player, randomize=False) while not game_over: actual_player = game.current_player board = game.board if actual_player == player1_ID: input_dict = {"obs": {}} action_mask = game.get_moves(True) input_dict["obs"]["state"] = board #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model.forward(input_dict, None, None) act = np.argmax(action_logits[0]) actions[player1] = act _, _, done, _ = game.step(actions) elif actual_player == player2_ID: act, _ = minimax_connect3(board, player2_ID, True, depth=depth) actions[player2] = act _, _, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if logger: logger.info("Game number " + str(i) + "/" + str(number_of_games)) logger.info("Player " + str(actual_player + 1) + " actions: " + str(act)) logger.info("\n" + repr(board)) logger.info( board_print(board, Connect3Config.HEIGHT, Connect3Config.WIDTH)) if done["__all__"]: if logger: logger.info("PLAYER " + str(game.winner + 1) + " WON...") logger.info("CURRENT SCORE: " + str(game.score[player1]) + " VS " + str(game.score[player2])) game_over = True print("Evaluation Over") elo_diff = compute_elo_difference(game.score[player1], game.num_draws, number_of_games) return elo_diff, game.score[player1], game.score[player2], game.num_draws
def minimax_vs_random_elo( depth, number_of_games, logger, ): """ Use the inverse of the elo formula to compute the outcome of a match, given that we already know the result of a match. It tests the relative elo between random algorithm and a minimax algorithm Elo formula: expected_score = 1/(1+10^((elo_diff)/400)) Inverse formula: elo_diff = -400*log(1/expected_score - 1) INPUT: depth: int number of game to play before updating the elo number_of_games: int number of game to play before updating the elo RETURN: elo_diff: float elo difference """ game = Connect4Env(None) if logger: logger.info("**********MINIMAX_depth_" + str(depth) + "_(X) VS (O)_RANDOM" + "**********") for i in tqdm(range(number_of_games)): game_over = False actions = {} starting_player = random.choice([player1_ID, player2_ID]) game.reset(starting_player=starting_player) print("\nPlayer " + str(starting_player + 1) + " is starting") while not game_over: actual_player = game.current_player board = game.board if actual_player == player1_ID: act, action_values = minimax(board, player1_ID, True, depth=depth, return_distr=True) actions[player1] = act _, _, done, _ = game.step(actions) elif actual_player == player2_ID: act = random.choice(game.get_moves(False)) actions[player2] = act _, _, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if logger: logger.info("Game number " + str(i) + "/" + str(number_of_games)) if actual_player == player1_ID: logger.info("action distribution: " + str(action_values)) logger.info("Player " + str(actual_player + 1) + " actions: " + str(act)) logger.info("\n" + repr(board)) logger.info(board_print(board)) if done["__all__"]: logger.info("PLAYER " + str(game.winner + 1) + " WON...") logger.info("CURRENT SCORE: " + str(game.score[player1]) + " VS " + str(game.score[player2])) game_over = True if game.score[player1] > game.score[player2]: score = game.score[player1] / number_of_games + game.num_draws / ( 2 * number_of_games) elif game.score[player1] < game.score[player2]: score = game.score[player2] / number_of_games + game.num_draws / ( 2 * number_of_games) elif game.score[player1] == game.score[player2]: return 0 if score >= 10 / 11: elo_diff = 400 else: elo_diff = -400 * math.log((1 / score - 1), 10) print("\nplayer 1 score: " + str(game.score[player1])) print("player 2 score: " + str(game.score[player2])) print("number of draw: " + str(game.num_draws)) print("elo difference computed over " + str(number_of_games) + " between the 2 algortithms is " + str(elo_diff)) return elo_diff
def model_vs_minimax(model, depth, number_of_games, checkpoint=None, logger=None, randomize=True): """ Use the inverse of the elo formula to compute the outcome of a match, given that we already know the result of a match. It tests the relative elo between a custom model and a minimax algorithm Elo formula: expected_score = 1/(1+10^((elo_diff)/400)) Inverse formula: elo_diff = -400*log(1/score - 1) INPUT: model: tensorflow model checkpoint: path to chekpoint of the model to use """ game = Connect4Env(None) model_name = model.name if logger: logger.info("**********" + str(model_name) + "_(X) VS (O)_MINIMAX_depth_" + str(depth) + "**********") print("Starting Evaluation") for i in range(number_of_games): game_over = False actions = {} if randomize: starting_player = random.choice([player1_ID, player2_ID]) else: starting_player = player1_ID game.reset(starting_player=starting_player) while not game_over: actual_player = game.current_player board = game.board if actual_player == player1_ID: input_dict = {"obs": {}} action_mask = game.get_moves(True) input_dict["obs"]["state"] = board #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model.forward(input_dict, None, None) act = np.argmax(action_logits[0]) actions[player1] = act _, _, done, _ = game.step(actions) elif actual_player == player2_ID: act, _ = minimax(board, player2_ID, True, depth=depth) actions[player2] = act _, _, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if logger: logger.info("Game number " + str(i) + "/" + str(number_of_games)) logger.info("Player " + str(actual_player + 1) + " actions: " + str(act)) logger.info("\n" + repr(board)) logger.info(board_print(board)) if done["__all__"]: logger.info("PLAYER " + str(game.winner + 1) + " WON...") logger.info("CURRENT SCORE: " + str(game.score[player1]) + " VS " + str(game.score[player2])) game_over = True print("Evaluation Over") elo_diff = compute_elo_difference(game.score[player1], game.num_draws, number_of_games) return elo_diff, game.score[player1], game.score[player2], game.num_draws
def minimax_vs_minimax_connect3_elo(depth1, depth2, number_of_games, logger=None): """ Use the inverse of the elo formula to compute the outcome of a match, given that we already know the result of a match. It tests the relative elo between minimax algorithm and a minimax algorithm Elo formula: expected_score = 1/(1+10^((elo_diff)/400)) Inverse formula: elo_diff = -400*log(1/expected_score - 1) """ game = Connect4Env( None, width=Connect3Config.WIDTH, height=Connect3Config.HEIGHT, n_actions=Connect3Config.N_ACTIONS, connect=Connect3Config.CONNECT, ) if logger: logger.info("**********MINIMAX_depth_" + str(depth1) + "_(X) VS (O)_MINIMAX_depth_" + str(depth2) + "**********") for i in tqdm(range(number_of_games)): game_over = False actions = {} starting_player = random.choice([player1_ID, player2_ID]) game.reset(starting_player=starting_player) print("\nPlayer " + str(starting_player + 1) + " is starting") while not game_over: actual_player = game.current_player board = game.board if actual_player == player1_ID: act, _ = minimax_connect3(board, player1_ID, True, depth=depth1) actions[player1] = act _, _, done, _ = game.step(actions) elif actual_player == player2_ID: act, _ = minimax_connect3(board, player2_ID, True, depth=depth2) actions[player2] = act _, _, done, _ = game.step(actions) else: raise ValueError("Player index is not valid, should be 0 or 1") if logger: logger.info("Game number " + str(i) + "/" + str(number_of_games)) logger.info("Player " + str(actual_player + 1) + " actions: " + str(act)) logger.info("\n" + repr(board)) logger.info( board_print(board, Connect3Config.HEIGHT, Connect3Config.WIDTH)) if done["__all__"]: if logger: logger.info("PLAYER " + str(game.winner + 1) + " WON...") logger.info("CURRENT SCORE: " + str(game.score[player1]) + " VS " + str(game.score[player2])) game_over = True if game.score[player1] > game.score[player2]: score = game.score[player1] / number_of_games + game.num_draws / ( 2 * number_of_games) elif game.score[player1] < game.score[player2]: score = game.score[player2] / number_of_games + game.num_draws / ( 2 * number_of_games) elif game.score[player1] == game.score[player2]: return 0 if score >= 10 / 11: elo_diff = 400 else: elo_diff = -400 * math.log((1 / score - 1), 10) print("\nplayer 1 score: " + str(game.score[player1])) print("player 2 score: " + str(game.score[player2])) print("number of draw: " + str(game.num_draws)) print("elo difference computed over " + str(number_of_games) + " games between the 2 minimax of depth " + str(depth1) + " " + str(depth2) + " is " + str(elo_diff)) return elo_diff
input_dict["obs"]["state"] = board #reshaped_board input_dict["obs"]["action_mask"] = action_mask action_logits, _ = model.forward(input_dict, None, None) act = np.argmax(action_logits) if act in act_to_do: success += 1 num_of_configs = len(BOARD_CONFIGS) success_rate = success / num_of_configs return success_rate, success, num_of_configs if __name__ == "__main__": # check if the given board configuration and actions are valid: env = Connect4Env(None) for elem in BOARD_CONFIGS: if elem.startswith("win_in_2"): continue actions_to_take = BOARD_CONFIGS[elem][1] board_config = BOARD_CONFIGS[elem][0] for act in actions_to_take: env.reset() env.board = board_config env.current_player = 0 action = {"player1": act} obs, reward, done, info = env.step(action) if done["__all__"] != True: print("Configuration Error: " + str(elem) + ":") print("The configuration " + str(env) + " does not end in a winning move " + str(act))