def on_start(passed_room=None): if passed_room is not None: room = passed_room else: room = rt.sids_to_rooms[request.sid] if not rt.game_rooms[room]["started"]: # Don't allow multiple starts print("Starting") broadcast_to_room(room)("", "start game") rt.game_rooms[room]["started"] = True # shuffle clients randomly clients_keys = list(rt.game_rooms[room]["clients"].keys()) random_keys = [i for i in range(len(rt.game_rooms[room]["clients"]))] if not keep_client_order: random.shuffle(random_keys) shuffled_clients = {} for i, k in enumerate(random_keys): shuffled_clients[k] = rt.game_rooms[room]["clients"][clients_keys[i]] rt.game_rooms[room]["clients"] = shuffled_clients ai_players = [c for c in rt.game_rooms[room]["clients"] if rt.game_rooms[room]["clients"][c]["ai"]] print(rt.game_rooms[room]["clients"]) engine = Engine(emit_to_client_in_room(room), broadcast_to_room(room), retrieve_response_in_room(room), nonlocal_ais=ai_players, n_players=len(rt.game_rooms[room]["clients"]), **parsed_args) winner = engine.run_game() socketio.stop()
def benchmark(self, agent: Agent, n=100): stats = {} # bench marking against other Agents opponents = [ TaxAgent(), IncomeAgent(), AdversarialAgent(), HonestAgent(), StrategicAgentV1(), TrickyAgent() ] # remove epsilon for benchmarking agent.epsilon = 0 agent.verbose = False for opponent in opponents: wins = 0 opponent_name = str(opponent) print(f"Benchmarking against {opponent_name}...") for i in range(n): engine = Engine(local_ais={0: agent, 1: opponent}) winner = engine.run_game() wins += 1 if winner == 0 else 0 win_rate = wins / (i + 1) print("Win rate: ", win_rate) stats[opponent_name] = win_rate print(stats) return stats
def evaluate_with_model(self, training_location, eval_model_location, n): wins = 0 for i in range(n): engine = Engine(local_ais={0: IncomeAgent(), 1: IncomeAgent()}) winner = engine.run_game() wins += 1 if winner == 0 else 0 print("WIN RATE: ", wins / (i + 1)) return wins / n
def self_train(self, training_location, hold_location): n = 10000 wins = 0 for i in range(n): print("TRAINING EPISODE: ", i) if (i % 1000 == 0): self.update_hold(hold_location, training_location) engine = Engine(local_ais={0: KerasAgent(), 1: KerasAgent()}) winner = engine.run_game() wins += 1 if winner == 0 else 0 print("WIN RATE: ", wins / (i + 1)) return wins / n
def train_epoch(self, training_agent: Agent, op_agent: Agent): """ prints(win rate, total reward, loss) returns xs, rewards """ for _ in range(self.epoch): engine = Engine(local_ais={0: training_agent, 1: op_agent}) winner = engine.run_game() xs, ys, rewards, game_histories = training_agent.get_training_data() xs = np.vstack(xs) discounted = np.vstack(self.discount_rewards(rewards)) training_history = training_agent.model.fit(xs, discounted) # reset stats training_agent.reset() return training_history, game_histories
def run_match(agents, elos=None, randomize_order=True, n_iters=1000, use_default_elos=False): if elos is None: if use_default_elos: elos = [DEFAULT_ELO] * len(agents) else: raise ValueError( "Must specify initial elos or specify to use default elos") old_elos = [elo for elo in elos] print("Agents:", str([str(agent) for agent in agents]).replace("'", "")) print("Old elos:", [round(elo) for elo in old_elos]) agent_order = [i for i in range(len(agents))] win_tracker = {} for i in range(len(agents)): win_tracker[i] = 0 for i in range(n_iters): if randomize_order: random.shuffle(agent_order) local_ais = {} for j in range(len(agents)): local_ais[j] = agents[agent_order[j]] engine = Engine(local_ais=local_ais) winner = engine.run_game() win_tracker[agent_order[winner]] += 1 if (i + 1) % 100 == 0: print(f"Win proportions after game {i+1}:", [ round(win_tracker[j] / (i + 1), 3) for j in range(len(win_tracker)) ]) elos = calculate_new_elos(elos, agent_order[winner]) print("Final win proportions:", [win_tracker[i] / n_iters for i in range(len(win_tracker))]) print("New elos:", [round(elo) for elo in elos]) elo_diffs = [elos[i] - old_elos[i] for i in range(len(elos))] elo_diffs = [("+" if diff >= 0 else "") + str(round(diff)) for diff in elo_diffs] print("Elo changes:", str(elo_diffs).replace("'", "")) return elos
local_ais = {0: a, 1: b} # print(a.model.hidden_state) # print(b.model.hidden_state) # for n, p in a.model.named_parameters(): # print(n, p.data) print("Starting Training") training_winners = [] n_train_iters = 10000 for i in range(n_train_iters): if i % 100 == 0: print("Training Iteration " + str(i + 1)) print("Win percentage this epoch:", len([r for r in training_winners[i - 100:i] if r == 0])) print([x for x in a.model.named_parameters()][-1]) e = Engine(local_ais=local_ais) winner = e.run_game() training_winners.append(winner) # print(a.model.hidden_state) # print(b.model.hidden_state) # time.sleep(2) # for name, p in a.model.named_parameters(): # print(name) # print(p.data) # time.sleep(2) # break a.train_model(winner) a.events = [] # for name, p in a.model.named_parameters():