def produce_data(when_to_start, dump_p, n_games, filename, folder): list_of_agents = [RandomAgent(), GreedyAgentBoost(), MinMaxAgent()] arek = ArenaMultiThread() arek.start_collecting_states() arek.collect_only_from_middle_game(when_to_start, dump_p) arek.all_vs_all('deterministic', list_of_agents, n_games) arek.dump_collected_states(filename, folder)
def run_baseline_comparison_v1(): experiment_name = 'baseline_comparison_v1' agent1 = GreedyAgentBoost(weight=[100, 1.5, 2.5, 1, 0.1]) agent2 = GreedyAgentBoost( weight=[0.99953495, 0.02010871, 0.02010487, 0.01095619, 0.00113329]) agent3 = MinMaxAgent(weight=[100, 2, 2, 1, 0.1]) # agent4 = MinMaxAgent(weight=[100, 2, 2, 1, 0.1], decay=0.7) # agent5 = GreedySearchAgent(depth = 9, weight = [100,2,2,1,0.1], decay = 0.95) # agent6 = GreedySearchAgent(depth = 7, weight = [100,2,2,1,0.1]) agent7 = GreedySearchAgent( depth=7, weight=[0.99953495, 0.02010871, 0.02010487, 0.01095619, 0.00113329], decay=0.95) # agent8 = RandomAgent(distribution='first_buy') multi_arena = ArenaMultiThread() n_games = 1 list_of_agents = [agent1, agent2, agent3, agent7] results = multi_arena.all_vs_all(list_of_agents, n_games) if main_thread: print(' \n \n {}'.format(results.to_pandas())) print('\n \n \n') print(results) wins = results.to_pandas(param='wins').to_csv('wins.csv') vic_points = results.to_pandas( param='victory_points').to_csv('victory_points.csv') rewards = results.to_pandas(param='reward').to_csv('reward.csv') #leader_board = LeaderBoard(list_of_agents) #leader_board.load_from_file() #leader_board.register_from_games_statistics(results) #print(leader_board) #leader_board.save_to_file() plt.title('Average win rate over {} games per pair:'.format(2 * n_games)) wins_pic = results.create_heatmap(param='wins', average=True) plt.savefig('reports/wins.png') plt.clf() plt.title('Average reward over {} games per pair:'.format(2 * n_games)) reward_pic = results.create_heatmap('reward', average=True) plt.savefig('reports/reward.png') plt.clf() plt.title('Average victory points over {} games per pair:'.format( 2 * n_games)) vic_points_pic = results.create_heatmap('victory_points', average=True) plt.savefig('reports/victory_points.png') plt.clf()
def go(): arena = MultiArena() data_transformer = IdentityTransformer() model = StateEncoder(final_layer=ValueRegressor(), data_transformer=data_transformer) model.load_weights('archive/weights_tt1/epoch_41.h5') value_policy = ValueEvaluator(model=model, weights_file=None) mcts_agent = SingleMCTSAgent(50, value_policy, 0.41, create_visualizer=False, show_unvisited_nodes=False, log_to_neptune=False) opp = MinMaxAgent() results = arena.run_many_duels('deterministic', [mcts_agent, opp], 10, 1, True) print(results)
def __init__(self, gems_encoder_dim : int = None, price_encoder_dim : int = None, profit_encoder_dim : int = None, cards_points_dim: int = None, cards_dense1_dim: int = None, cards_dense2_dim: int = None, board_nobles_dense1_dim : int = None, board_nobles_dense2_dim : int = None, full_board_dense1_dim: int = None, full_board_dense2_dim: int = None, player_points_dim: int = None, player_nobles_dim: int = None, full_player_dense1_dim: int = None, full_player_dense2_dim: int = None, final_layer= None, data_transformer = None, network_name: str = None ): super().__init__() self.vectorizer = Vectorizer() self.final_layer = final_layer self.data_transformer = data_transformer self.params['data transormation'] = self.data_transformer.name self.params['final layer name'] = self.final_layer.name self.params['gems_encoder_dim'] = gems_encoder_dim self.params['gems_encoder_dim'] = gems_encoder_dim self.params['price_encoder_dim'] = price_encoder_dim self.params['profit_encoder_dim'] = profit_encoder_dim self.params['cards_points_dim'] = cards_points_dim self.params['cards_dense1_dim'] = cards_dense1_dim self.params['cards_dense2_dim'] = cards_dense2_dim self.params['board_nobles_dense1_dim'] = board_nobles_dense1_dim self.params['board_nobles_dense2_dim'] = board_nobles_dense2_dim self.params['full_board_dense1_dim']= full_board_dense1_dim self.params['full_board_dense2_dim'] = full_board_dense2_dim self.params['player_points_dim'] = player_points_dim self.params['player_nobles_dim'] = player_nobles_dim self.params['full_player_dense1_dim'] = full_player_dense1_dim self.params['full_player_dense2_dim']= full_player_dense2_dim self.arena = Arena() self.network_agent = ValueNNAgent(self) self.easy_opp = RandomAgent(distribution='first_buy') self.medium_opp = GreedyAgentBoost() self.hard_opp = MinMaxAgent() self.neptune_monitor = NeptuneMonitor() self.network_name = network_name self.gems_encoder = GemsEncoder(gems_encoder_dim) self.price_encoder = PriceEncoder(price_encoder_dim) self.board_encoder = BoardEncoder(self.gems_encoder, ManyNoblesEncoder(price_encoder_dim, board_nobles_dense1_dim, board_nobles_dense2_dim), ManyCardsEncoder(MAX_CARDS_ON_BORD, profit_encoder_dim, price_encoder_dim, cards_points_dim, cards_dense1_dim, cards_dense2_dim ), full_board_dense1_dim, full_board_dense2_dim) self.player_encoder = PlayerEncoder(self.gems_encoder, self.price_encoder, ManyCardsEncoder(MAX_RESERVED_CARDS, profit_encoder_dim, price_encoder_dim, cards_points_dim, cards_dense1_dim, cards_dense2_dim ), player_points_dim, player_nobles_dim, full_player_dense1_dim, full_player_dense2_dim) active_player_input = PlayersInputGenerator('active_').inputs other_player_input = PlayersInputGenerator('other_').inputs board_input = self.board_encoder.inputs self.inputs = board_input + active_player_input + other_player_input board_encoded = self.board_encoder(board_input) active_player_encoded = self.player_encoder(active_player_input) other_player_encoded = self.player_encoder(other_player_input) full_state = Concatenate(axis=-1)([board_encoded, active_player_encoded, other_player_encoded]) full_state = Dense(full_player_dense1_dim, activation='relu')(full_state) final_state = Dense(full_player_dense2_dim, activation='relu')(full_state) result = self.final_layer(final_state) self.layer = Model(inputs = self.inputs, outputs = final_state, name = 'full_state_splendor_estimator') self.network = Model(inputs = self.inputs, outputs = result, name = 'full_state_splendor_estimator') self.network.compile(Adam(), loss='mean_squared_error') self.params['Model name'] = 'Average pooling model' self.params['optimizer_name'] = 'Adam'
from agents.random_agent import RandomAgent from agents.minmax_agent import MinMaxAgent from agents.greedysearch_agent2 import GreedySearchAgent from arena.arena import Arena environment_id = 'gym_splendor_code:splendor-v1' fight_pit = Arena(environment_id) goku = RandomAgent(distribution='first_buy') goku2 = RandomAgent(distribution='uniform') #gohan = RandomAgent(distribution='uniform_on_types') #gohan = RandomAgent(distribution='uniform') #goku = GreedyAgen/t(weight = 0.3) gohan = GreedySearchAgent(depth=5) goku = MinMaxAgent(name="MinMax", depth=3) gohan.name = "g2" goku.name = "g1" # profi = cProfile.Profile() # # profi.run('(fight_pit.run_many_duels([goku, gohan], number_of_games=50))') # profi.dump_stats('profi2.prof') fight_pit.run_one_duel([goku, gohan], render_game=True) # time_dupa = time.time() # for i in range(100): # print(i) # fight_pit = Arena() # fight_pit.run_one_duel([goku, gohan], starting_agent_id=0) # print(time.time() - time_dupa)
gohan = GreedyAgentBoost(weight = [100,2.5,1.5,1,0.1]) print(gohan.name) goku = RandomAgent(distribution='uniform') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) goku = RandomAgent(distribution='uniform_on_types') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) goku = RandomAgent(distribution = 'first_buy') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) gohan = MinMaxAgent(name = "MinMax", depth = 2) print(gohan.name) goku = RandomAgent(distribution='uniform') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) goku = RandomAgent(distribution='uniform_on_types') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) goku = RandomAgent(distribution = 'first_buy') print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True)) gohan = MinMaxAgent(name = "MinMax", depth = 3) print(gohan.name)