def main(): winners = {} winners["1"] = 0 winners["-1"] = 0 # Collecting stats of the games nGames = 1000 # how many games? arr = np.zeros(nGames) for g in range(nGames): winner = Backgammon.play_a_game(commentary=False, net=new_agent) # print("game %i finished", g) # print("winner is ", winner) winners[str(winner)] += 1 arr[g] = winner # print("this is hw") # print(new_agent.val_func_nn.hidden_weights) # print("this is theta") # print(new_agent.policy_nn.theta) if (g % 50 == 0): print(new_agent.torch_nn_policy.theta) # print(winners) file = open('Failed.py', 'w') file.write(np.array_str(arr)) file.close() print("Out of", nGames, "games,") print("player", 1, "won", winners["1"], "times and") print("player", -1, "won", winners["-1"], "times")
def main(): winners = {} winners["1"] = 0 winners["-1"] = 0 # Collecting stats of the games nGames = 10000 # how many games? arr = np.zeros(nGames) for g in tqdm(range(nGames)): # w=new_agent.actor.theta # print(w) ###Zero eligibility traces (according to psudo code) agent.actor.zero_el() agent.critic.zero_el() winner = Backgammon.play_a_game(commentary=False, net=agent) winners[str(winner)] += 1 arr[g] = winner # if(g % 100 == 0): # print(new_agent.torch_nn_policy.theta) # print(winners) # ##Save the agent file_net = open('saved_net_one_2', 'wb') pickle.dump(agent, file_net) file_net.close() print("Out of", nGames, "games,") print("player", 1, "won", winners["1"], "times and") print("player", -1, "won", winners["-1"], "times")
def main(): ranges = 1 winners = {} winners["1"] = 0 winners["-1"] = 0 # Collecting stats of the games nGames = 1000 # how many games? arr = np.zeros(nGames) for g in tqdm(range(nGames)): # ##Zero eligibility traces (according to psudo code) winner = Backgammon.play_a_game(commentary=False, net=agent, train=train) winners[str(winner)] += 1 arr[g] = winner if(g % 10 == 0): print(agent.actor.theta) k = winners["1"] print("winrate is %f" % (k / (g + 0.00000001))) # print(winners) # Save the agent if(train is True): file_net = open('saved_net_one', 'wb') pickle.dump(agent.actor.theta, file_net) file_net.close() print("Out of", ranges, nGames, "games,") print("player", 1, "won", winners["1"], "times and") print("player", -1, "won", winners["-1"], "times")
def evaluate(agent, evaluation_agent, n_eval, n_games): wins = 0 for i in range(n_eval): winner, board = Backgammon.play_a_game(agent, evaluation_agent) wins += int(winner == 1) winrate = round(wins / n_eval * 100, 3) print("Win-rate after training for " + str(n_games) + " games: " + str(winrate) + "%") return winrate
for i in range(n_eval): winner, board = Backgammon.play_a_game(agent, evaluation_agent) wins += int(winner == 1) winrate = round(wins / n_eval * 100, 3) print("Win-rate after training for " + str(n_games) + " games: " + str(winrate) + "%") return winrate def train(n_games=200_000, n_epochs=5000, n_eval=1000): agent = kotra evaluation_agent = pubeval winrates = [] for g in range(n_games): if g % n_epochs == 0 and g != 0: winrate = evaluate(agent, evaluation_agent, n_eval, n_games=g) winrates.append(winrate) winner, board = Backgammon.play_a_game(agent, agent, train=True, train_config={'g': g}) agent.game_over_update(board, int(winner == 1)) agent.game_over_update(kotra.flip_board(board), int(winner == -1)) plot_perf(winrates) # ----- main ----- train()