def get_win_rate(agent1, agent2, environment: str, configuration={}, episodes=100): rewards = evaluate( environment=environment, agents=[agent1, agent2], configuration=configuration, num_episodes=episodes // 2, ) rewards += [[b, a] for [a, b] in evaluate( environment=environment, agents=[agent2, agent1], configuration=configuration, num_episodes=episodes - episodes // 2, )] agent_1_win_rate = np.round(rewards.count([1, -1]) / len(rewards), decimals=2) print(f"Agent 1 Win Rate: {agent_1_win_rate}") agent_2_win_rate = np.round(rewards.count([-1, 1]) / len(rewards), decimals=2) print(f"Agent 2 Win Rate: {agent_2_win_rate}") agent_1_invalid_games = rewards.count([None, 0]) print(f"Agent 1 Invalid games: {agent_1_invalid_games}") agent_2_invalid_games = rewards.count([0, None]) print(f"Agent 2 Invalid games: {agent_2_invalid_games}")
def get_win_percentage(agent1, agent2, n_rounds=50): config = {'rows': 6, 'columns': 7, 'inarow': 4} outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2) outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)] print("Your Agent's Win Percentage (in 50 game rounds):", np.round(outcomes.count([1,-1])/len(outcomes), 2)) print("Tutorial Agent's Win Percentage (in 50 game rounds):", np.round(outcomes.count([-1,1])/len(outcomes), 2)) exercise_agent_win_percentage = np.round(outcomes.count([1,-1])/len(outcomes), 2) return exercise_agent_win_percentage
def get_win_percentages(agent1, agent2, n_rounds=100): # Use default Connect Four setup config = {'rows': 6, 'columns': 7, 'inarow': 4} # Agent 1 goes first (roughly) half the time outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2) # Agent 2 goes first (roughly) half the time outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)] print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2)) print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2)) print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0])) print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))
def get_win_percentages_and_score(agent1, agent2, n_rounds=100, silent=False): # Use default Connect Four setup config = {'rows': 6, 'columns': 7, 'inarow': 4} # Agent 1 goes first (roughly) half the time outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2) # Agent 2 goes first (roughly) half the time outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)] if not silent: print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2)) print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2)) print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0])) print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None])) print("Number of Draws (in {} game rounds):".format(n_rounds), outcomes.count([0, 0])) return 3 * outcomes.count([1,-1]) + outcomes.count([0, 0])
def one_against_three(agents): n_agents = len(agents) scores = np.zeros((n_agents, n_agents), dtype=np.int) print("Simulation of battles. It can take some time...") for ind_1 in range(n_agents): for ind_2 in range(n_agents): print( f"LOG: {agents[ind_1]} vs 3 X {agents[ind_2]}", end="\r" ) current_score = evaluate( "hungry_geese", [ agents[ind_1], agents[ind_2], agents[ind_2], agents[ind_2], ], num_episodes=100, ) episode_winners = np.argmax(current_score, axis=1) episode_winner_counts = collections.Counter(episode_winners) scores[ind_1, ind_2] = episode_winner_counts.get(0, 0) print() return scores
def one_on_one_with_two_simple(agents): n_agents = len(agents) scores = np.zeros((n_agents, n_agents), dtype=np.int) print("Simulation of battles. It can take some time...") for ind_1 in range(n_agents): for ind_2 in range(ind_1 + 1, n_agents): print( f"LOG: {agents[ind_1]} vs {agents[ind_2]} vs 2 X simple_toward", end="\r" ) current_score = evaluate( "hungry_geese", [ agents[ind_1], agents[ind_2], "simple_toward.py", "simple_toward.py", ], num_episodes=100, ) episode_winners = np.argmax(current_score, axis=1) episode_winner_counts = collections.Counter(episode_winners) scores[ind_1, ind_2] = episode_winner_counts.get(0, 0) scores[ind_2, ind_1] = episode_winner_counts.get(1, 0) print() return scores
def eval(): scores = evaluate('hungry_geese', ['greedy', 'submission.py', 'greedy', 'greedy'], num_episodes=100) scoreboard = [0, 0, 0, 0] for score in scores: winner = np.argmax(score) scoreboard[winner] += 1 print(scores) print(scoreboard) print()
def winPercentage(self, episode): # print("vs " + str(self.trainer.enemy)) env = make("connectx", debug=True) env.render() env.reset() config = {'rows': 6, 'columns': 7, 'inarow': 4} outcomes = evaluate("connectx", [self.agent, self.trainer.enemy], config, [], self.rounds // 2) # Agent 2 goes first (roughly) half the time outcomes += [[ b, a ] for [a, b] in evaluate("connectx", [self.trainer.enemy, self.agent], config, [], self.rounds - self.rounds // 2)] self.trainer.writer.add_scalar( 'win_percentage_agent', np.round(outcomes.count([1, -1]) / len(outcomes), 2), episode) self.trainer.writer.add_scalar( 'win_percentage_random', np.round(outcomes.count([-1, 1]) / len(outcomes), 2), episode)
def compare_agents(env, agent1, agent2, num_episodes=10): """ Args: env: the kaggle_environments-made env to run in agent1 (str|kaggle): either a Kaggle standard agent loaded with load_agent or a path to your own custom agent agent2 (str): as above, to compete with agent1 num_episodes: How many times to average the game over """ rewards = ke.evaluate("connectx", [agent1, agent2], num_episodes=num_episodes) try: mean_rewards = np.mean(rewards, axis=0) except TypeError as te: raise TypeError(f"{te}None-reward likely means your submission" f" file isn't runnable i.e. has an error in it.") print("mean reward of agent1 vs agent2:", mean_rewards[0], ":", mean_rewards[1]) return mean_rewards
def get_result(match_settings): start = datetime.now() outcomes = kaggle_environments.evaluate( 'rps', [match_settings[0], match_settings[1]], num_episodes=match_settings[2], configuration={'debug': True}) won, lost, tie, avg_score = 0, 0, 0, 0. for outcome in outcomes: score = outcome[0] if score > 0: won += 1 elif score < 0: lost += 1 else: tie += 1 avg_score += score elapsed = datetime.now() - start opponent_name = os.path.basename(match_settings[1]) opponent_name = os.path.splitext(opponent_name)[0] print( f'... vs {opponent_name:<30} --- {won:2d}/{tie:2d}/{lost:2d} --- {avg_score}' ) return match_settings[1], won, lost, tie, elapsed, float( avg_score) / float(match_settings[2])
# ** 'signs': int # ** 'tieRewardThreshold': int # Making the environment configuration = { "actTimeout" : 1, "agentTimeout": 60, "runTimeout" : 1200 } env = kg.make("rps", debug=True, configuration = configuration) # Making agents from classes ewa_agent = EWAAgent(sample_mode=False) ewa_agent_sample = EWAAgent(sample_mode=True) # Loading agent from file random_agent = "main.py" # Evaluate agents = [ewa_agent_sample.play, ewa_agent.play] steps = 1000 num_episodes = 10 results = kg.evaluate('rps', agents, configuration, num_episodes= num_episodes) agent_0_wins = [1 if rewards[0] > rewards[1] else 0 for rewards in results] agent_0_draws= [1 if rewards[0] == rewards[1] else 0 for rewards in results] print(f"""(Agent 0) Results. \n******Wins: {sum(agent_0_wins)/num_episodes * 100 :.2f}% of episodes Draws: {sum(agent_0_draws)/num_episodes * 100 :.2f}%""") print(f"(Agent 0) Average rewards: {sum([rewards[0] for rewards in results])/num_episodes :.2f}.")
def action_evaluate(args): return json.dumps( evaluate(args.environment, args.agents, args.configuration, args.steps, args.episodes))
while not env.done: my_action = my_agent(observation, env.configuration) print("My Action", my_action) observation, reward, done, info = trainer.step(my_action) # env.render(mode="ipython", width=100, height=90, header=False, controls=False) env.render() def mean_reward(rewards): return sum(r[0] for r in rewards) / float(len(rewards)) # Run multiple episodes to estimate its performance. print( "My Agent vs Random Agent:", mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=1000))) print( "My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10))) import inspect import os os.chdir('E:\\Projects\\04_ConnectX') def write_agent_to_file(function, file): with open(file, "a" if os.path.exists(file) else "w") as f: f.write(inspect.getsource(function)) print(function, "written to", file)
# import submission module to test as agent function from submissions.submission_REINFORCE import agent_function from kaggle_environments import evaluate, make, utils # Adapted to new reward structure # Count number of wins. Draws are losses... def mean_reward(rewards, first=True): return sum(1 if r[0 if first else 1] == 1 else 0 for r in rewards) / float(len(rewards)) # Calculate in each direction and vs each build in agent reward_random = mean_reward(evaluate("connectx", [agent_function, "random"], num_episodes=10)) print("Ours vs Random:", reward_random) reward_negamax = mean_reward(evaluate("connectx", [agent_function, "negamax"], num_episodes=10)) print("Ours vs Negamax:", reward_negamax) reward_random_inv = mean_reward(evaluate("connectx", ["random", agent_function], num_episodes=10), first=False) print("Random vs Ours:", reward_random_inv) reward_negamax_inv = mean_reward(evaluate("connectx", ["negamax", agent_function], num_episodes=10), first=False) print("Negamax vs Ours:", reward_negamax_inv)
def eval(agent): results = evaluate("connectx", [agent, "negamax"], num_episodes=10) final_result = agent_reward(results) print("my Agent vs Negamax Agent:", final_result)
from tqdm import tqdm from time import sleep from kaggle_environments import make, evaluate x = evaluate("rps", ["player_paper_lover.py", "player_random_player.py"], configuration={"episodeSteps": 1000}) print(x) # import os # print(os.listdir())
import sys from kaggle_environments import evaluate args = sys.argv agents = [args[1], args[2]] ### Do not edit above rewards = evaluate("rps", agents=agents, configuration={"episodeSteps": 1000}) ### Do not edit below print('\nD_MATCH_FINISHED') print(rewards[0])
def test_can_evaluate(): rewards = evaluate("connectx", ["random", "random"], num_episodes=2) assert (rewards[0][0] + rewards[0][1] == 1) and rewards[1][0] + rewards[1][1] == 1
env = make(ENV_NAME, configuration={"episodeSteps": NR_STEPS}) for i in (list_agents): if not (os.path.exists(i)): raise ImportError('One of the agents path is not well defined') for ind_agent_1 in range(len(list_names)): for ind_agent_2 in range(ind_agent_1 + 1, len(list_names)): print(f"LOG: {list_names[ind_agent_1]} vs {list_names[ind_agent_2]}", end="\r") results = [] for i in range(3): current_score = evaluate( ENV_NAME, [list_agents[ind_agent_1], list_agents[ind_agent_2]], configuration={"episodeSteps": NR_STEPS} ) if current_score[0][0] is None: results.append(0) else: results.append(current_score[0][0]) scores[ind_agent_1, ind_agent_2] = np.mean(results) scores[ind_agent_2, ind_agent_1] = -np.mean(results) min_scores[ind_agent_1, ind_agent_2] = min(results) min_scores[ind_agent_2, ind_agent_1] = -min(results) max_scores[ind_agent_1, ind_agent_2] = max(results) max_scores[ind_agent_2, ind_agent_1] = -max(results)
"nash_equilibrium", "markov_agent", "memory_patterns", # "multi_armed_bandit", "opponent_transition_matrix", "decision_tree_classifier", "statistical_prediction", ] list_agents = [agent_name + ".py" for agent_name in list_names] simulation_times = 10 scores = np.zeros((len(list_names), simulation_times), dtype=int) for i in range(simulation_times): for ind_agent_1 in range(len(list_names)): current_score = evaluate( "rps", ["multi_armed_bandit.py", list_agents[ind_agent_1]], configuration={"episodeSteps": 1000}) print(i, list_names[ind_agent_1], current_score[0][0]) if current_score[0][0] >= 20: add_score = 1 elif current_score[0][0] <= -20: add_score = -1 else: add_score = 0 scores[ind_agent_1, i] = add_score df_scores = pd.DataFrame(scores) df_scores.index = list_names print(df_scores.mean(axis=1)) # print(df_scores.std(axis=1)) print(df_scores.median(axis=1))
def test_wins_against_4_randoms(): scores = evaluate("halite", [agent, "random", "random", "random"], num_episodes=1, configuration={"agentExec": "LOCAL"}) assert scores[0] == max(scores)
def test_can_evaluate(): rewards = evaluate("tictactoe", ["random", "reaction"], num_episodes=2) assert (rewards[0][0] + rewards[0][1] == 0) and rewards[1][0] + rewards[1][1] == 0
def test_wins_against_random(): my_score, enemy_score = evaluate("halite", [agent, "random"], num_episodes=1, configuration={"agentExec": "LOCAL"})[0] assert my_score > enemy_score
# These lines test it against another ai # print("My Agent vs Random Agent:", mean_reward(evaluate("connect x", [my_agent, "random"], num_episodes=10))) # print("My Agent vs Negmax Agent:", mean_reward(evaluate("connect x", [my_agent, "negamax"], num_episodes=10))) # Play your agent env.play([my_agent, None], width=500, height=450) def write_agent_to_file(function, file): with open(file, "a" if os.path.exists(file) else "w") as f: f.write(inspect.getsource(function)) print(function, "written to", file) def mean_reward(rewards): return sum(r[0] for r in rewards) / sum(r[0] + r[1] for r in rewards) write_agent_to_file(my_agent, "submission.py") out = sys.stdout submission = utils.read_file("submission.py") agent = utils.get_last_callable(submission) sys.stdout = out env = make("connectx", debug=True) # Run multiple episodes to estimate its performance. print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [agent, "random"], num_episodes=10))) print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [agent, "negamax"], num_episodes=10))) print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")
import os from kaggle_environments import make, register, evaluate from kaggle_environments.envs.football import football env = make("football", debug=True, configuration={"scenario_name": "test_example_multiagent", "team_1": 1, "team_2": 0, "episodeSteps": 100, "render": False, "save_video": True}) print(env.name, env.version) print("Default Agents: ", *env.agents) env.run(["run_right", "run_left"]) print("Video: %s" % env.football_video_path) football.cleanup(env) print("Logs stored in /tmp/football/%s" % env.id) configuration = {"scenario_name": "test_example_multiagent", "team_1": 1, "team_2": 0, "episodeSteps": 100, "render": False, "save_video": True} agents = ["run_right", "run_left"] rewards = evaluate("football", agents, configuration, steps=[], num_episodes=10) ## Broken: evaluate looks only on rewards from the last step. # (or should we finish after a scored goal??) print(rewards) football.cleanup_all()