Esempio n. 1
0
def on_start(passed_room=None):
    if passed_room is not None:
        room = passed_room
    else:
        room = rt.sids_to_rooms[request.sid]
    if not rt.game_rooms[room]["started"]:  # Don't allow multiple starts
        print("Starting")
        broadcast_to_room(room)("",  "start game")
        rt.game_rooms[room]["started"] = True
        # shuffle clients randomly
        clients_keys = list(rt.game_rooms[room]["clients"].keys())
        random_keys = [i for i in range(len(rt.game_rooms[room]["clients"]))]
        if not keep_client_order:
            random.shuffle(random_keys)
        shuffled_clients = {}
        for i, k in enumerate(random_keys):
            shuffled_clients[k] = rt.game_rooms[room]["clients"][clients_keys[i]]
        rt.game_rooms[room]["clients"] = shuffled_clients

        ai_players = [c for c in rt.game_rooms[room]["clients"] if rt.game_rooms[room]["clients"][c]["ai"]]
        print(rt.game_rooms[room]["clients"])
        engine = Engine(emit_to_client_in_room(room), broadcast_to_room(room), retrieve_response_in_room(room),
                        nonlocal_ais=ai_players, n_players=len(rt.game_rooms[room]["clients"]), **parsed_args)
        winner = engine.run_game()
        socketio.stop()
Esempio n. 2
0
 def benchmark(self, agent: Agent, n=100):
     stats = {}
     # bench marking against other Agents
     opponents = [
         TaxAgent(),
         IncomeAgent(),
         AdversarialAgent(),
         HonestAgent(),
         StrategicAgentV1(),
         TrickyAgent()
     ]
     # remove epsilon for benchmarking
     agent.epsilon = 0
     agent.verbose = False
     for opponent in opponents:
         wins = 0
         opponent_name = str(opponent)
         print(f"Benchmarking against {opponent_name}...")
         for i in range(n):
             engine = Engine(local_ais={0: agent, 1: opponent})
             winner = engine.run_game()
             wins += 1 if winner == 0 else 0
             win_rate = wins / (i + 1)
         print("Win rate: ", win_rate)
         stats[opponent_name] = win_rate
     print(stats)
     return stats
Esempio n. 3
0
 def evaluate_with_model(self, training_location, eval_model_location, n):
     wins = 0
     for i in range(n):
         engine = Engine(local_ais={0: IncomeAgent(), 1: IncomeAgent()})
         winner = engine.run_game()
         wins += 1 if winner == 0 else 0
         print("WIN RATE: ", wins / (i + 1))
     return wins / n
Esempio n. 4
0
    def self_train(self, training_location, hold_location):
        n = 10000
        wins = 0
        for i in range(n):
            print("TRAINING EPISODE: ", i)
            if (i % 1000 == 0):
                self.update_hold(hold_location, training_location)

            engine = Engine(local_ais={0: KerasAgent(), 1: KerasAgent()})
            winner = engine.run_game()
            wins += 1 if winner == 0 else 0
            print("WIN RATE: ", wins / (i + 1))
        return wins / n
Esempio n. 5
0
    def train_epoch(self, training_agent: Agent, op_agent: Agent):
        """
        prints(win rate, total reward, loss)
        returns xs, rewards
        """
        for _ in range(self.epoch):
            engine = Engine(local_ais={0: training_agent, 1: op_agent})
            winner = engine.run_game()

        xs, ys, rewards, game_histories = training_agent.get_training_data()
        xs = np.vstack(xs)
        discounted = np.vstack(self.discount_rewards(rewards))
        training_history = training_agent.model.fit(xs, discounted)

        # reset stats
        training_agent.reset()
        return training_history, game_histories
Esempio n. 6
0
def run_match(agents,
              elos=None,
              randomize_order=True,
              n_iters=1000,
              use_default_elos=False):
    if elos is None:
        if use_default_elos:
            elos = [DEFAULT_ELO] * len(agents)
        else:
            raise ValueError(
                "Must specify initial elos or specify to use default elos")
    old_elos = [elo for elo in elos]
    print("Agents:", str([str(agent) for agent in agents]).replace("'", ""))
    print("Old elos:", [round(elo) for elo in old_elos])

    agent_order = [i for i in range(len(agents))]
    win_tracker = {}
    for i in range(len(agents)):
        win_tracker[i] = 0

    for i in range(n_iters):
        if randomize_order:
            random.shuffle(agent_order)
        local_ais = {}
        for j in range(len(agents)):
            local_ais[j] = agents[agent_order[j]]
        engine = Engine(local_ais=local_ais)
        winner = engine.run_game()
        win_tracker[agent_order[winner]] += 1
        if (i + 1) % 100 == 0:
            print(f"Win proportions after game {i+1}:", [
                round(win_tracker[j] / (i + 1), 3)
                for j in range(len(win_tracker))
            ])

        elos = calculate_new_elos(elos, agent_order[winner])

    print("Final win proportions:",
          [win_tracker[i] / n_iters for i in range(len(win_tracker))])
    print("New elos:", [round(elo) for elo in elos])
    elo_diffs = [elos[i] - old_elos[i] for i in range(len(elos))]
    elo_diffs = [("+" if diff >= 0 else "") + str(round(diff))
                 for diff in elo_diffs]
    print("Elo changes:", str(elo_diffs).replace("'", ""))
    return elos
Esempio n. 7
0
local_ais = {0: a, 1: b}
# print(a.model.hidden_state)
# print(b.model.hidden_state)
# for n, p in a.model.named_parameters():
#     print(n, p.data)

print("Starting Training")
training_winners = []
n_train_iters = 10000
for i in range(n_train_iters):
    if i % 100 == 0:
        print("Training Iteration " + str(i + 1))
        print("Win percentage this epoch:",
              len([r for r in training_winners[i - 100:i] if r == 0]))
        print([x for x in a.model.named_parameters()][-1])
    e = Engine(local_ais=local_ais)
    winner = e.run_game()
    training_winners.append(winner)

    #     print(a.model.hidden_state)
    #     print(b.model.hidden_state)
    #     time.sleep(2)
    #     for name, p in a.model.named_parameters():
    #         print(name)
    #         print(p.data)
    #         time.sleep(2)
    #         break

    a.train_model(winner)
    a.events = []
#     for name, p in a.model.named_parameters():