def main(argv = None): parser = ArgumentParser() subparsers = parser.add_subparsers(dest='command_name', required=True) # Run arguments play_parser = subparsers.add_parser("play") agent_group = play_parser.add_mutually_exclusive_group() agent_group.add_argument("--my-agent", type=str, help="Play agent of name ... against three rule_based_agents") agent_group.add_argument("--agents", type=str, nargs="+", default=["rule_based_agent"] * s.MAX_AGENTS, help="Explicitly set the agent names in the game") play_parser.add_argument("--train", default=0, type=int, choices=[0, 1, 2, 3, 4], help="First … agents should be set to training mode") play_parser.add_argument("--continue-without-training", default=False, action="store_true") # play_parser.add_argument("--single-process", default=False, action="store_true") play_parser.add_argument("--n-rounds", type=int, default=10, help="How many rounds to play") play_parser.add_argument("--save-replay", const=True, default=False, action='store', nargs='?', help='Store the game as .pt for a replay') play_parser.add_argument("--no-gui", default=False, action="store_true", help="Deactivate the user interface and play as fast as possible.") # Replay arguments replay_parser = subparsers.add_parser("replay") replay_parser.add_argument("replay", help="File to load replay from") # Interaction for sub in [play_parser, replay_parser]: sub.add_argument("--fps", type=int, default=15, help="FPS of the GUI (does not change game)") sub.add_argument("--turn-based", default=False, action="store_true", help="Wait for key press until next movement") sub.add_argument("--update-interval", type=float, default=0.1, help="How often agents take steps (ignored without GUI)") sub.add_argument("--log_dir", type=str, default=os.path.dirname(os.path.abspath(__file__)) + "/logs") # Video? sub.add_argument("--make-video", default=False, action="store_true", help="Make a video from the game") args = parser.parse_args(argv) if args.command_name == "replay": args.no_gui = False args.n_rounds = 1 has_gui = not args.no_gui if has_gui: if not LOADED_PYGAME: raise ValueError("pygame could not loaded, cannot run with GUI") pygame.init() # Initialize environment and agents if args.command_name == "play": agents = [] if args.train == 0 and not args.continue_without_training: args.continue_without_training = True if args.my_agent: agents.append((args.my_agent, len(agents) < args.train)) args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1) for agent_name in args.agents: agents.append((agent_name, len(agents) < args.train)) world = BombeRLeWorld(args, agents) elif args.command_name == "replay": world = ReplayWorld(args) else: raise ValueError(f"Unknown command {args.command_name}") # Emulate Windows process spawning behaviour under Unix (for testing) # mp.set_start_method('spawn') user_inputs = [] # Start game logic thread t = threading.Thread(target=game_logic, args=(world, user_inputs, args), name="Game Logic") t.daemon = True t.start() # Run one or more games for _ in tqdm(range(args.n_rounds)): if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # First render if has_gui: world.render() pygame.display.flip() round_finished = False last_frame = time() user_inputs.clear() # Main game loop while not round_finished: if has_gui: # Grab GUI events for event in pygame.event.get(): if event.type == pygame.QUIT: if world.running: world.end_round() world.end() return elif event.type == pygame.KEYDOWN: key_pressed = event.key if key_pressed in (pygame.K_q, pygame.K_ESCAPE): world.end_round() if not world.running: round_finished = True # Convert keyboard input into actions if s.INPUT_MAP.get(key_pressed): if args.turn_based: user_inputs.clear() user_inputs.append(s.INPUT_MAP.get(key_pressed)) # Render only once in a while if time() - last_frame >= 1 / args.fps: world.render() pygame.display.flip() last_frame = time() else: sleep_time = 1 / args.fps - (time() - last_frame) if sleep_time > 0: sleep(sleep_time) elif not world.running: round_finished = True else: # Non-gui mode, check for round end in 1ms sleep(0.001) world.end()
def main(argv=None): # valid events EVENTS = [ 'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED', 'INVALID_ACTION', 'BOMB_DROPPED', 'BOMB_EXPLODED', 'CRATE_DESTROYED', 'COIN_FOUND', 'COIN_COLLECTED', 'KILLED_OPPONENT', 'KILLED_SELF', 'GOT_KILLED', 'OPPONENT_ELIMINATED', 'SURVIVED_ROUND' ] MOVEMENT = [ 'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED', 'INVALID_ACTION', 'BOMB_DROPPED' ] EVENTS_NO_MVNT = [ 'CRATE_DESTROYED', 'COIN_COLLECTED', 'KILLED_SELF', 'KILLED_OPPONENT', 'OPPONENT_ELIMINATED', 'BOMB_DROPPED', 'COIN_FOUND', 'SURVIVED_ROUND' ] # interesting stuff for plotting all_game_rewards_mean = [ ] # has shape (#loaded checkpoints, #games per checkpoint) all_scores = [] # has shape (#loaded checkpoints, #games per checkpoint) all_steps_alive = [ ] # has shape (#loaded checkpoints, #games per checkpoint) all_rewards = [ ] # has shape (#loaded checkpoints, #games per checkpoint, #steps per game) all_rewards_steps = [] # has shape (#checkpoints*games*steps) all_events = defaultdict( list ) # dict containing eventcounts in shape (#loaded checkpoints, #games per checkpoint) all_ratios = defaultdict( list ) # dict containing ratios computed per chechkpoint, i.e. (#loaded checkpoints, ) epsilons = [] play_parser = ArgumentParser() # Run arguments agent_group = play_parser.add_mutually_exclusive_group() agent_group.add_argument( "--my-agent", type=str, help="Play agent of name ... against three rule_based_agents") agent_group.add_argument("--agents", type=str, nargs="+", default=["rule_based_agent"] * s.MAX_AGENTS, help="Explicitly set the agent names in the game") play_parser.add_argument( "--save-steps", type=int, nargs="+", default=[0] * s.MAX_AGENTS, help="Explicitly set the save point for the agent") play_parser.add_argument( "--train", default=0, type=int, choices=[0, 1, 2, 3, 4], help="First … agents should be set to training mode") play_parser.add_argument("--continue-without-training", default=False, action="store_true") play_parser.add_argument("--eval-start", default=0, type=int, help="first eval step") play_parser.add_argument("--eval-stop", default=0, type=int, help="last eval step") play_parser.add_argument("--eval-step", default=1, type=int, help="eval step") play_parser.add_argument("--games", default=10, type=int, help="number of games to evaluate per checkpoint") play_parser.add_argument("--name", default='', type=str, help="name of eval plots") # play_parser.add_argument("--single-process", default=False, action="store_true") args = play_parser.parse_args(argv) args.no_gui = True args.make_video = False args.log_dir = '/tmp' # Initialize environment and agents agents = [] if args.train == 0 and not args.continue_without_training: args.continue_without_training = True if args.my_agent: agents.append((args.my_agent, len(agents) < args.train)) args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1) for agent_name in args.agents: agents.append((agent_name, len(agents) < args.train)) fig, axs = plt.subplots(4, figsize=(15, 15)) fig.tight_layout(pad=5) fig2, ax = plt.subplots(4, figsize=(15, 15)) fig2.tight_layout(pad=6) ax_1_2 = ax[1].twinx() eval_name = '' for save_step_iter in tqdm( list(range(args.eval_start, args.eval_stop + 1, args.eval_step))): global seed seed = 0 world = EvalWorld(args, agents) for i, a in enumerate(world.agents): args.save_steps[i] = save_step_iter if args.save_steps[i] >= 0: prev_cwd = os.getcwd() try: if a.backend.runner.fake_self.agent.save_step < save_step_iter: print("last checkpoint reached -> done") return os.chdir( f'./agent_code/{world.agents[0].backend.code_name}/') a.backend.runner.fake_self.agent.load(args.save_steps[i]) a.backend.runner.fake_self.agent.evaluate_model = True except Exception as e: print(f'{a.name} does not support loading!') print(e) finally: os.chdir(prev_cwd) try: if not args.name and not eval_name: eval_name = '_' + world.agents[ 0].backend.runner.fake_self.agent.checkpoint print(f'using the name {eval_name[1:]}') elif not eval_name: eval_name = '_' + args.name print(f'using the name {eval_name[1:]}') epsilons.append( world.agents[0].backend.runner.fake_self.agent.epsilon) except: epsilons.append(0) score = [] event_counter = defaultdict(list) step_counter = [] reward_history = [] move_history = deque(maxlen=2) for round_cnt in range(args.games): seed = round_cnt + 1 score.append(0) step_counter.append(0) reward_history.append([0]) for ev in EVENTS: event_counter[ev].append(0) if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # Main game loop round_finished = False dead = False while not round_finished: if world.running: world.do_step('WAIT') if not dead: step_counter[-1] += 1 for ev in world.agents[0].events: if ev == 'COIN_COLLECTED': score[-1] += s.REWARD_COIN elif ev == 'KILLED_OPPONENT': score[-1] += s.REWARD_KILL event_counter[ev][-1] += 1 move_history, reward = compute_reward( move_history, world.agents[0].events) reward_history[-1].append(reward) all_rewards_steps.append(reward) dead = world.agents[0].dead # if not world.agents[0].dead: # print(1, world.agents[0].events) # if not world.agents[1].dead: # print(2, world.agents[1].events) # if not world.agents[2].dead: # print(3, world.agents[2].events) # if not world.agents[3].dead: # print(4, world.agents[3].events) else: round_finished = True world.end() # general plotting values #print(f'score: {sum(score)}') all_scores.append(score) #print(f'steps alive: {sum(step_counter)}') all_steps_alive.append(step_counter) for ev in EVENTS: #print(f'{e}: {sum(event_counter[e])}') all_events[ev].append(event_counter[ev]) try: crate_bomb_ratio = sum(event_counter["CRATE_DESTROYED"]) / sum( event_counter["BOMB_DROPPED"]) except ZeroDivisionError: crate_bomb_ratio = 0 all_ratios['crate-bomb-ratio'].append(crate_bomb_ratio) #print(f'crate-bomb-ratio: {round(crate_bomb_ratio, 2)}') game_rewards_mean = [np.mean(x) for x in reward_history] all_rewards.append(reward_history) reward_colors = ['cornflowerblue', 'midnightblue', 'crimson'] all_game_rewards_mean.append([np.mean(x) for x in reward_history]) if len(all_steps_alive) > 1: ############################# ####### plots ######### ############################# ''' for i, n in enumerate([1, 5, 50]): if i == 0: axs[0].plot(all_rewards_steps, label=f'reward per step', color=reward_colors[i]) else: axs[0].plot(running_mean(all_rewards_steps, n), label=f'running mean: {n}', color=reward_colors[i]) axs[0].set(xlabel='steps', ylabel='reward', title='Rewards') axs[0].legend(loc='upper left') axs[0].set_xlim(left=0) axs[0].grid() axs[1].set(xlabel='checkpoint', ylabel='mean reward', title='Mean reward and movements per checkpoint') axs[1].plot(game_rewards_mean[1:-1], label='reward', linewidth=2.0) for e in MOVEMENT: axs[1].plot(running_mean(all_events[e][1:], 2), label=e) axs[1].set_xlim(left=0) axs[1].grid() axs[1].legend(ncol=len(MOVEMENT), loc='upper left') axs[2].set(xlabel='checkpoint', ylabel='count', title='Mean reward and event counts per checkpoint') axs[2].plot(game_rewards_mean[1:-1], label='reward', linewidth=2.0) for e in EVENTS_NO_MVNT: axs[2].plot(running_mean(all_events[e][1:], 2), label=e) axs[2].set_xlim(left=0) axs[2].grid() axs[2].legend(ncol=6, loc='upper left') fig.savefig(f"agent_code/revised_dq_agent/eval/{world.agents[0].name}_general.png") axs[0].clear() axs[1].clear() axs[2].clear() ''' # same per checkpoint ax[0].set(xlabel='checkpoint', ylabel='steps', title="Steps survived per checkpoint") ax[0].plot(np.mean(np.array(all_steps_alive), axis=1), label='mean steps', color='dimgrey', alpha=0.6) ax[0].plot(running_mean(np.mean(np.array(all_steps_alive), axis=1), 10), label='running mean (10)', color='dimgrey') #ax[0].plot(np.array(all_ratios['crate-bomb-ratio']*args.games), label='crate/bomb') # print(np.array(all_ratios['crate-bomb-ratio'])) ax[0].legend(loc='upper left') ax[0].set_xlim(0, len(all_steps_alive) - 1) ax[0].grid() ax[1].set(xlabel='checkpoint', title="Score and reward per checkpoint") ax[1].plot(np.mean(np.array(all_game_rewards_mean), axis=1), label='rewards', color='navy', alpha=0.6) ax[1].plot(running_mean( np.mean(np.array(all_game_rewards_mean), axis=1), 10), label='running mean (10)', color='navy') ax[1].set_ylabel('reward', color='navy') ax[1].set_xlim(0, len(all_steps_alive) - 1) ax[1].grid() ax_1_2.plot(np.mean(np.array(all_scores), axis=1), color='crimson', alpha=0.6) ax_1_2.set_ylabel('score', color='crimson') ax_1_2.plot(running_mean(np.mean(np.array(all_scores), axis=1), 10), color='crimson') ax_1_2.set_xlim(0, len(all_steps_alive) - 1) # adjust left scale to grid l = ax[1].get_ylim() l2 = ax_1_2.get_ylim() def f(x): return l2[0] + (x - l[0]) / (l[1] - l[0]) * (l2[1] - l2[0]) ticks = f(ax[1].get_yticks()) ax_1_2.yaxis.set_major_locator( matplotlib.ticker.FixedLocator(ticks)) #align.yaxes(ax[1], 0, ax_1_2, 0, 0.5) y = [] #['MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED','INVALID_ACTION', 'BOMB_DROPPED'] color_moves = [ 'dodgerblue', 'deepskyblue', 'limegreen', 'seagreen', 'slategrey', 'coral', 'orangered' ] for ev in MOVEMENT: y.append( np.mean(np.array(all_events[ev]), axis=1) / np.mean(np.array(all_steps_alive), axis=1)) ax[2].stackplot(range(len(y[0])), *y, labels=MOVEMENT, colors=color_moves) ax[2].legend(bbox_to_anchor=(0.5, 1.15), loc='upper center', ncol=len(MOVEMENT)) ax[2].grid() ax[2].set_xlim(0, len(all_steps_alive) - 1) ax[2].set_ylim(0, 1) ''' ax[2].set(xlabel='checkpoint', ylabel='mean reward', title='Mean events per game') for e in EVENTS_NO_MVNT: ax[2].plot(running_mean(np.mean(np.array(all_events[e]), axis=0),2), label=e) ax[2].set_xlim(left=0) ax[2].grid() ax[2].legend(ncol=7, loc='upper left') ax[3].set(xlabel='checkpoint', ylabel='mean reward', title='Mean movements per game') for e in MOVEMENT: ax[1].plot(running_mean(np.mean(np.array(all_events[e]), axis=0), 2), label=e) ax[3].set_xlim(left=0) ax[3].grid() ax[3].legend(ncol=len(MOVEMENT), loc='upper left') ''' ax[3].set(xlabel='checkpoint', title="Epsilon per checkpoint", ylabel='epsilon') ax[3].plot(epsilons, color='cornflowerblue', linewidth=2.0) ax[3].set_xlim(0, len(all_steps_alive) - 1) ax[3].set_ylim(0, 1) ax[3].grid() fig2.savefig( f"agent_code/revised_dq_agent/eval/{world.agents[0].name}{eval_name}_checkpoint.png" ) ax[0].clear() ax[1].clear() ax[2].clear() ax[3].clear() ax_1_2.clear()
def main(argv=None): # valid events EVENTS = [ 'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED', 'INVALID_ACTION', 'BOMB_DROPPED', 'BOMB_EXPLODED', 'CRATE_DESTROYED', 'COIN_FOUND', 'COIN_COLLECTED', 'KILLED_OPPONENT', 'KILLED_SELF', 'GOT_KILLED', 'OPPONENT_ELIMINATED', 'SURVIVED_ROUND' ] MOVEMENT = [ 'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED', 'INVALID_ACTION', 'BOMB_DROPPED' ] EVENTS_NO_MVNT = [ 'CRATE_DESTROYED', 'COIN_COLLECTED', 'KILLED_SELF', 'KILLED_OPPONENT', 'OPPONENT_ELIMINATED', 'BOMB_DROPPED', 'COIN_FOUND', 'SURVIVED_ROUND' ] EVENTS_CHART = [ 'CRATE_DESTROYED', 'COIN_COLLECTED', 'KILLED_SELF', 'BOMB_DROPPED', 'SURVIVED_ROUND', 'KILLED_OPPONENT' ] # interesting stuff for plotting all_game_rewards_mean = [ ] # has shape (#loaded checkpoints, #games per checkpoint) all_scores = [] # has shape (#loaded checkpoints, #games per checkpoint) all_scores_others = [ ] # has shape (#loaded checkpoints, #games per checkpoint, #other agents) all_steps_alive = [ ] # has shape (#loaded checkpoints, #games per checkpoint) all_rewards = [ ] # has shape (#loaded checkpoints, #games per checkpoint, #steps per game) all_rewards_steps = [] # has shape (#checkpoints*games*steps) all_events = defaultdict( list ) # dict containing eventcounts in shape (#loaded checkpoints, #games per checkpoint) all_ratios = defaultdict( list ) # dict containing ratios computed per chechkpoint, i.e. (#loaded checkpoints, ) epsilons = [] play_parser = ArgumentParser() # Run arguments agent_group = play_parser.add_mutually_exclusive_group() agent_group.add_argument( "--my-agent", type=str, help="Play agent of name ... against three rule_based_agents") agent_group.add_argument("--agents", type=str, nargs="+", default=[], help="Explicitly set the agent names in the game") play_parser.add_argument( "--save-steps", type=int, nargs="+", default=[0] * s.MAX_AGENTS, help="Explicitly set the save point for the agent") play_parser.add_argument( "--train", default=0, type=int, choices=[0, 1, 2, 3, 4], help="First … agents should be set to training mode") play_parser.add_argument("--continue-without-training", default=False, action="store_true") play_parser.add_argument("--eval-start", default=0, type=int, help="first eval step") play_parser.add_argument("--eval-stop", default=0, type=int, help="last eval step") play_parser.add_argument("--eval-step", default=1, type=int, help="eval step") play_parser.add_argument("--games", default=10, type=int, help="number of games to evaluate per checkpoint") play_parser.add_argument("--name", default='', type=str, help="name of eval plots") # play_parser.add_argument("--single-process", default=False, action="store_true") play_parser.add_argument("--conf", default='compare.json', type=str, help="compare conf json file") args = play_parser.parse_args(argv) args.no_gui = True args.make_video = False args.log_dir = '/tmp' conf = json.load(open(args.conf, 'r')) global REWARDS REWARDS = conf['rewards'] compare_agents = conf['agents'] # Initialize environment and agents agents = [(compare_agents[0]['name'], False)] if args.train == 0 and not args.continue_without_training: args.continue_without_training = True # if args.my_agent: # agents.append((args.my_agent, len(agents) < args.train)) # args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1) for agent_name in args.agents: agents.append((agent_name, len(agents) < args.train)) compare_name = args.name for agent_iter in tqdm(list(range(len(compare_agents)))): agents[0] = (compare_agents[agent_iter]['name'], False) global seed seed = 0 os.environ['AGENT_CONF'] = compare_agents[agent_iter].get('conf', '') world = EvalWorld(args, agents) for a in world.agents: try: a.backend.runner.fake_self.agent.evaluate_model = True except: pass if 'step' in compare_agents[agent_iter]: prev_cwd = os.getcwd() try: os.chdir(f'./agent_code/{world.agents[0].backend.code_name}/') world.agents[0].backend.runner.fake_self.agent.load( compare_agents[agent_iter]['step']) except Exception as e: print(f'{world.agents[0].name} does not support loading!') print(e) finally: os.chdir(prev_cwd) try: epsilons.append( world.agents[0].backend.runner.fake_self.agent.epsilon) except: epsilons.append(0) score = [] score_others = [] event_counter = defaultdict(list) step_counter = [] reward_history = [] move_history = deque(maxlen=2) for round_cnt in range(args.games): seed = round_cnt + 1 score.append(0) score_others.append([0] * (len(world.agents) - 1)) step_counter.append(0) reward_history.append([0]) for ev in EVENTS: event_counter[ev].append(0) if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # Main game loop round_finished = False dead = [False, False, False, False] while not round_finished: if world.running: world.do_step('WAIT') if not dead[0]: step_counter[-1] += 1 for ev in world.agents[0].events: if ev == 'COIN_COLLECTED': score[-1] += s.REWARD_COIN elif ev == 'KILLED_OPPONENT': score[-1] += s.REWARD_KILL event_counter[ev][-1] += 1 for a_i, a in enumerate(world.agents[1:]): if not dead[a_i + 1]: for ev in a.events: if ev == 'COIN_COLLECTED': score_others[-1][a_i] += s.REWARD_COIN elif ev == 'KILLED_OPPONENT': score_others[-1][a_i] += s.REWARD_KILL move_history, reward = compute_reward( move_history, world.agents[0].events) reward_history[-1].append(reward) all_rewards_steps.append(reward) for a_i, a in enumerate(world.agents): dead[a_i] = a.dead # if not world.agents[0].dead: # print(1, world.agents[0].events) # if not world.agents[1].dead: # print(2, world.agents[1].events) # if not world.agents[2].dead: # print(3, world.agents[2].events) # if not world.agents[3].dead: # print(4, world.agents[3].events) else: round_finished = True world.end() # general plotting values #print(f'score: {sum(score)}') all_scores.append(score) all_scores_others.append(score_others) #print(f'steps alive: {sum(step_counter)}') all_steps_alive.append(step_counter) for ev in EVENTS: #print(f'{ev}: {sum(event_counter[ev])}') all_events[ev].append(event_counter[ev]) crate_bomb_ratio = sum(event_counter["CRATE_DESTROYED"]) / sum( event_counter["BOMB_DROPPED"]) all_ratios['crate-bomb-ratio'].append(crate_bomb_ratio) #print(f'crate-bomb-ratio: {round(crate_bomb_ratio, 2)}') game_rewards_mean = [np.mean(x) for x in reward_history] all_rewards.append(reward_history) reward_colors = ['cornflowerblue', 'midnightblue', 'crimson'] all_game_rewards_mean.append([np.mean(x) for x in reward_history]) ############################# ####### plots ######### ############################# fig, ax = plt.subplots(nrows=(4 + len(EVENTS_CHART)) // 2 + 1, ncols=2, figsize=(16, 20)) fig.tight_layout(pad=8) ax = ax.flatten() agent_names = [x.get('alias', x['name']) for x in conf['agents']][:len(all_steps_alive)] # same per checkpoint ax[0].set(xlabel='agent name', ylabel='steps', title="Survival comparison") ax[0].bar(agent_names, np.mean(np.array(all_steps_alive), axis=1), label='mean steps', color='dimgrey') for label in ax[0].get_xticklabels(): label.set_rotation(30) label.set_ha('right') ax[1].set(xlabel='agent name', title="Score comparison") ax[1].bar(agent_names, np.mean(np.array(all_scores), axis=1), label='mean score', color='crimson') ax[1].set_ylabel('score') for label in ax[1].get_xticklabels(): label.set_rotation(30) label.set_ha('right') ax[2].set(xlabel='agent name', title="Reward comparison") ax[2].bar(agent_names, np.mean(np.array(all_game_rewards_mean), axis=1), label='mean reward', color='navy') ax[2].set_ylabel('reward') for label in ax[2].get_xticklabels(): label.set_rotation(30) label.set_ha('right') #['GOT_KILLED', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED','INVALID_ACTION', 'BOMB_DROPPED'] color_moves = [ 'dodgerblue', 'deepskyblue', 'limegreen', 'slategrey', 'seagreen', 'coral', 'orangered' ] for i, ev in enumerate(EVENTS_CHART): ax[3 + i].set(xlabel='agent name', title=f"{ev} comparison") ax[3 + i].bar(agent_names, np.mean(np.array(all_events[ev]), axis=1), label=f"{ev} mean", color=color_moves[i]) ax[3 + i].set_ylabel(f"{ev}") for label in ax[3 + i].get_xticklabels(): label.set_rotation(30) label.set_ha('right') score_sums = np.sum(np.array(all_scores_others), axis=1).T score_sums = np.vstack((np.sum(np.array(all_scores), axis=1), score_sums)).astype(np.float32) total_scores = np.sum(score_sums, axis=0).astype(np.float32) colors = ['#0197F6', 'dimgrey', 'darkgray', 'gainsboro'] labels = ['agent', 'enemy #1', 'enemy #2', 'enemy #3'] previous = np.zeros_like(score_sums[0]) for i in range(len(score_sums)): score_sums[i] = np.true_divide(score_sums[i], total_scores) ax[3 + len(EVENTS_CHART)].bar(agent_names, score_sums[i], bottom=previous, color=colors[i], label=labels[i]) previous += score_sums[i] ax[3 + len(EVENTS_CHART)].set(xlabel='agent name', title='score distibution aross all games') ax[3 + len(EVENTS_CHART)].set_ylabel('score distribution') ax[3 + len(EVENTS_CHART)].set_ylim(0, 1) ax[3 + len(EVENTS_CHART)].legend(bbox_to_anchor=(1, 1), handlelength=0.8) for label in ax[3 + len(EVENTS_CHART)].get_xticklabels(): label.set_rotation(30) label.set_ha('right') got_killed = np.mean(np.array(all_events['GOT_KILLED']), axis=1) killed_self = np.mean(np.array(all_events['KILLED_SELF']), axis=1) got_killed -= killed_self survived_round = np.mean(np.array(all_events['SURVIVED_ROUND']), axis=1) survival = np.vstack( (survived_round, killed_self, got_killed)).astype(np.float32) colors = ['springgreen', 'salmon', 'red'] labels = ['survived', 'killed self', 'killed by enemy'] previous = np.zeros_like(survived_round) for i in range(len(survival)): ax[4 + len(EVENTS_CHART)].bar(agent_names, survival[i], bottom=previous, color=colors[i], label=labels[i]) previous += survival[i] ax[4 + len(EVENTS_CHART)].set(xlabel='agent name', title='survival distibution aross all games') ax[4 + len(EVENTS_CHART)].set_ylabel('survival distribution') ax[4 + len(EVENTS_CHART)].set_ylim(0, 1) ax[4 + len(EVENTS_CHART)].legend(bbox_to_anchor=(1, 1), handlelength=0.8) for label in ax[4 + len(EVENTS_CHART)].get_xticklabels(): label.set_rotation(30) label.set_ha('right') ax[5 + len(EVENTS_CHART)].set_visible(False) fig.savefig(f"eval/{compare_name}_comparison.png", dpi=300)
def main(argv=None): play_parser = ArgumentParser() # Run arguments agent_group = play_parser.add_mutually_exclusive_group() agent_group.add_argument( "--my-agent", type=str, help="Play agent of name ... against three rule_based_agents") agent_group.add_argument("--agents", type=str, nargs="+", default=["rule_based_agent"] * s.MAX_AGENTS, help="Explicitly set the agent names in the game") play_parser.add_argument( "--train", default=0, type=int, choices=[0, 1, 2, 3, 4], help="First … agents should be set to training mode") play_parser.add_argument("--continue-without-training", default=False, action="store_true") # play_parser.add_argument("--single-process", default=False, action="store_true") play_parser.add_argument("--n-rounds", type=int, default=-1, help="How many rounds to play") play_parser.add_argument("--n-steps", type=int, default=-1, help="How many steps to play") play_parser.add_argument("--reload-steps", type=int, default=10000, help="How many steps until reload") args = play_parser.parse_args(argv) args.no_gui = True args.make_video = False args.log_dir = '/tmp' # Initialize environment and agents agents = [] if args.train == 0 and not args.continue_without_training: args.continue_without_training = True if args.my_agent: agents.append((args.my_agent, len(agents) < args.train)) args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1) for agent_name in args.agents: agents.append((agent_name, len(agents) < args.train)) world = TrainWorld(args, agents) step_counter = 0 round_counter = 0 prev_load_counter = 0 tqdm_iter_count = args.n_rounds if args.n_rounds != -1 else args.n_steps pbar = tqdm(total=tqdm_iter_count) # Run one or more games done = False while not done: if prev_load_counter + args.reload_steps <= step_counter: prev_load_counter = step_counter print('trying to update agents') try: save_step = world.agents[ 0].backend.runner.fake_self.agent.save_step - 1 prev_cwd = os.getcwd() if save_step >= 0: os.chdir( f'./agent_code/{world.agents[0].backend.code_name}/') for a in world.agents[1:]: try: a.backend.runner.fake_self.agent.load(save_step) print( f'reloaded agent {a.name} for step {save_step}' ) except Exception as e: print(f'{a.name} does not support loading!') print(e) except Exception as e: print('first agent is not one of us!') print(e) finally: os.chdir(prev_cwd) if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # Main game loop round_finished = False while not round_finished: if world.running: if step_counter >= args.n_steps and args.n_steps != -1: world.end_round() world.do_step('WAIT') step_counter += 1 if args.n_rounds == -1: pbar.update(1) else: round_finished = True round_counter += 1 if args.n_steps == -1: pbar.update(1) if step_counter >= args.n_steps and args.n_steps != -1: done = True if round_counter >= args.n_rounds and args.n_rounds != -1: done = True world.end() print(f'steps trained: {step_counter}') print(f'rounds trained: {round_counter}')