예제 #1
0
def main(argv = None):
    parser = ArgumentParser()

    subparsers = parser.add_subparsers(dest='command_name', required=True)

    # Run arguments
    play_parser = subparsers.add_parser("play")
    agent_group = play_parser.add_mutually_exclusive_group()
    agent_group.add_argument("--my-agent", type=str, help="Play agent of name ... against three rule_based_agents")
    agent_group.add_argument("--agents", type=str, nargs="+", default=["rule_based_agent"] * s.MAX_AGENTS, help="Explicitly set the agent names in the game")
    play_parser.add_argument("--train", default=0, type=int, choices=[0, 1, 2, 3, 4],
                             help="First … agents should be set to training mode")
    play_parser.add_argument("--continue-without-training", default=False, action="store_true")
    # play_parser.add_argument("--single-process", default=False, action="store_true")

    play_parser.add_argument("--n-rounds", type=int, default=10, help="How many rounds to play")
    play_parser.add_argument("--save-replay", const=True, default=False, action='store', nargs='?', help='Store the game as .pt for a replay')
    play_parser.add_argument("--no-gui", default=False, action="store_true", help="Deactivate the user interface and play as fast as possible.")

    # Replay arguments
    replay_parser = subparsers.add_parser("replay")
    replay_parser.add_argument("replay", help="File to load replay from")

    # Interaction
    for sub in [play_parser, replay_parser]:
        sub.add_argument("--fps", type=int, default=15, help="FPS of the GUI (does not change game)")
        sub.add_argument("--turn-based", default=False, action="store_true",
                         help="Wait for key press until next movement")
        sub.add_argument("--update-interval", type=float, default=0.1,
                         help="How often agents take steps (ignored without GUI)")
        sub.add_argument("--log_dir", type=str, default=os.path.dirname(os.path.abspath(__file__)) + "/logs")

        # Video?
        sub.add_argument("--make-video", default=False, action="store_true",
                         help="Make a video from the game")

    args = parser.parse_args(argv)
    if args.command_name == "replay":
        args.no_gui = False
        args.n_rounds = 1

    has_gui = not args.no_gui
    if has_gui:
        if not LOADED_PYGAME:
            raise ValueError("pygame could not loaded, cannot run with GUI")
        pygame.init()

    # Initialize environment and agents
    if args.command_name == "play":
        agents = []
        if args.train == 0 and not args.continue_without_training:
            args.continue_without_training = True
        if args.my_agent:
            agents.append((args.my_agent, len(agents) < args.train))
            args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1)
        for agent_name in args.agents:
            agents.append((agent_name, len(agents) < args.train))

        world = BombeRLeWorld(args, agents)
    elif args.command_name == "replay":
        world = ReplayWorld(args)
    else:
        raise ValueError(f"Unknown command {args.command_name}")

    # Emulate Windows process spawning behaviour under Unix (for testing)
    # mp.set_start_method('spawn')

    user_inputs = []

    # Start game logic thread
    t = threading.Thread(target=game_logic, args=(world, user_inputs, args), name="Game Logic")
    t.daemon = True
    t.start()

    # Run one or more games
    for _ in tqdm(range(args.n_rounds)):
        if not world.running:
            world.ready_for_restart_flag.wait()
            world.ready_for_restart_flag.clear()
            world.new_round()

        # First render
        if has_gui:
            world.render()
            pygame.display.flip()

        round_finished = False
        last_frame = time()
        user_inputs.clear()

        # Main game loop
        while not round_finished:
            if has_gui:
                # Grab GUI events
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        if world.running:
                            world.end_round()
                        world.end()
                        return
                    elif event.type == pygame.KEYDOWN:
                        key_pressed = event.key
                        if key_pressed in (pygame.K_q, pygame.K_ESCAPE):
                            world.end_round()
                        if not world.running:
                            round_finished = True
                        # Convert keyboard input into actions
                        if s.INPUT_MAP.get(key_pressed):
                            if args.turn_based:
                                user_inputs.clear()
                            user_inputs.append(s.INPUT_MAP.get(key_pressed))

                # Render only once in a while
                if time() - last_frame >= 1 / args.fps:
                    world.render()
                    pygame.display.flip()
                    last_frame = time()
                else:
                    sleep_time = 1 / args.fps - (time() - last_frame)
                    if sleep_time > 0:
                        sleep(sleep_time)
            elif not world.running:
                round_finished = True
            else:
                # Non-gui mode, check for round end in 1ms
                sleep(0.001)

    world.end()
예제 #2
0
def main(argv=None):

    # valid events
    EVENTS = [
        'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED',
        'INVALID_ACTION', 'BOMB_DROPPED', 'BOMB_EXPLODED', 'CRATE_DESTROYED',
        'COIN_FOUND', 'COIN_COLLECTED', 'KILLED_OPPONENT', 'KILLED_SELF',
        'GOT_KILLED', 'OPPONENT_ELIMINATED', 'SURVIVED_ROUND'
    ]
    MOVEMENT = [
        'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED',
        'INVALID_ACTION', 'BOMB_DROPPED'
    ]
    EVENTS_NO_MVNT = [
        'CRATE_DESTROYED', 'COIN_COLLECTED', 'KILLED_SELF', 'KILLED_OPPONENT',
        'OPPONENT_ELIMINATED', 'BOMB_DROPPED', 'COIN_FOUND', 'SURVIVED_ROUND'
    ]

    # interesting stuff for plotting
    all_game_rewards_mean = [
    ]  # has shape (#loaded checkpoints, #games per checkpoint)
    all_scores = []  # has shape (#loaded checkpoints, #games per checkpoint)
    all_steps_alive = [
    ]  # has shape (#loaded checkpoints, #games per checkpoint)
    all_rewards = [
    ]  # has shape (#loaded checkpoints, #games per checkpoint, #steps per game)
    all_rewards_steps = []  # has shape (#checkpoints*games*steps)

    all_events = defaultdict(
        list
    )  # dict containing eventcounts in shape (#loaded checkpoints, #games per checkpoint)
    all_ratios = defaultdict(
        list
    )  # dict containing ratios computed per chechkpoint, i.e. (#loaded checkpoints, )

    epsilons = []

    play_parser = ArgumentParser()

    # Run arguments
    agent_group = play_parser.add_mutually_exclusive_group()
    agent_group.add_argument(
        "--my-agent",
        type=str,
        help="Play agent of name ... against three rule_based_agents")
    agent_group.add_argument("--agents",
                             type=str,
                             nargs="+",
                             default=["rule_based_agent"] * s.MAX_AGENTS,
                             help="Explicitly set the agent names in the game")
    play_parser.add_argument(
        "--save-steps",
        type=int,
        nargs="+",
        default=[0] * s.MAX_AGENTS,
        help="Explicitly set the save point for the agent")
    play_parser.add_argument(
        "--train",
        default=0,
        type=int,
        choices=[0, 1, 2, 3, 4],
        help="First … agents should be set to training mode")
    play_parser.add_argument("--continue-without-training",
                             default=False,
                             action="store_true")
    play_parser.add_argument("--eval-start",
                             default=0,
                             type=int,
                             help="first eval step")
    play_parser.add_argument("--eval-stop",
                             default=0,
                             type=int,
                             help="last eval step")
    play_parser.add_argument("--eval-step",
                             default=1,
                             type=int,
                             help="eval step")
    play_parser.add_argument("--games",
                             default=10,
                             type=int,
                             help="number of games to evaluate per checkpoint")
    play_parser.add_argument("--name",
                             default='',
                             type=str,
                             help="name of eval plots")
    # play_parser.add_argument("--single-process", default=False, action="store_true")

    args = play_parser.parse_args(argv)
    args.no_gui = True
    args.make_video = False
    args.log_dir = '/tmp'

    # Initialize environment and agents
    agents = []
    if args.train == 0 and not args.continue_without_training:
        args.continue_without_training = True
    if args.my_agent:
        agents.append((args.my_agent, len(agents) < args.train))
        args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1)
    for agent_name in args.agents:
        agents.append((agent_name, len(agents) < args.train))

    fig, axs = plt.subplots(4, figsize=(15, 15))
    fig.tight_layout(pad=5)

    fig2, ax = plt.subplots(4, figsize=(15, 15))
    fig2.tight_layout(pad=6)

    ax_1_2 = ax[1].twinx()

    eval_name = ''

    for save_step_iter in tqdm(
            list(range(args.eval_start, args.eval_stop + 1, args.eval_step))):
        global seed
        seed = 0

        world = EvalWorld(args, agents)

        for i, a in enumerate(world.agents):
            args.save_steps[i] = save_step_iter
            if args.save_steps[i] >= 0:
                prev_cwd = os.getcwd()
                try:
                    if a.backend.runner.fake_self.agent.save_step < save_step_iter:
                        print("last checkpoint reached -> done")
                        return
                    os.chdir(
                        f'./agent_code/{world.agents[0].backend.code_name}/')
                    a.backend.runner.fake_self.agent.load(args.save_steps[i])
                    a.backend.runner.fake_self.agent.evaluate_model = True
                except Exception as e:
                    print(f'{a.name} does not support loading!')
                    print(e)
                finally:
                    os.chdir(prev_cwd)

        try:
            if not args.name and not eval_name:
                eval_name = '_' + world.agents[
                    0].backend.runner.fake_self.agent.checkpoint
                print(f'using the name {eval_name[1:]}')
            elif not eval_name:
                eval_name = '_' + args.name
                print(f'using the name {eval_name[1:]}')
            epsilons.append(
                world.agents[0].backend.runner.fake_self.agent.epsilon)
        except:
            epsilons.append(0)

        score = []

        event_counter = defaultdict(list)
        step_counter = []
        reward_history = []
        move_history = deque(maxlen=2)

        for round_cnt in range(args.games):
            seed = round_cnt + 1

            score.append(0)
            step_counter.append(0)
            reward_history.append([0])
            for ev in EVENTS:
                event_counter[ev].append(0)

            if not world.running:
                world.ready_for_restart_flag.wait()
                world.ready_for_restart_flag.clear()
                world.new_round()

            # Main game loop
            round_finished = False
            dead = False
            while not round_finished:
                if world.running:
                    world.do_step('WAIT')
                    if not dead:
                        step_counter[-1] += 1
                        for ev in world.agents[0].events:
                            if ev == 'COIN_COLLECTED':
                                score[-1] += s.REWARD_COIN
                            elif ev == 'KILLED_OPPONENT':
                                score[-1] += s.REWARD_KILL
                            event_counter[ev][-1] += 1
                        move_history, reward = compute_reward(
                            move_history, world.agents[0].events)
                        reward_history[-1].append(reward)
                        all_rewards_steps.append(reward)
                    dead = world.agents[0].dead
                    # if not world.agents[0].dead:
                    #    print(1, world.agents[0].events)
                    # if not world.agents[1].dead:
                    #    print(2, world.agents[1].events)
                    # if not world.agents[2].dead:
                    #    print(3, world.agents[2].events)
                    # if not world.agents[3].dead:
                    #    print(4, world.agents[3].events)
                else:
                    round_finished = True

            world.end()

        # general plotting values

        #print(f'score: {sum(score)}')
        all_scores.append(score)
        #print(f'steps alive: {sum(step_counter)}')
        all_steps_alive.append(step_counter)
        for ev in EVENTS:
            #print(f'{e}: {sum(event_counter[e])}')
            all_events[ev].append(event_counter[ev])

        try:
            crate_bomb_ratio = sum(event_counter["CRATE_DESTROYED"]) / sum(
                event_counter["BOMB_DROPPED"])
        except ZeroDivisionError:
            crate_bomb_ratio = 0

        all_ratios['crate-bomb-ratio'].append(crate_bomb_ratio)
        #print(f'crate-bomb-ratio: {round(crate_bomb_ratio, 2)}')

        game_rewards_mean = [np.mean(x) for x in reward_history]
        all_rewards.append(reward_history)

        reward_colors = ['cornflowerblue', 'midnightblue', 'crimson']
        all_game_rewards_mean.append([np.mean(x) for x in reward_history])

        if len(all_steps_alive) > 1:
            #############################
            #######     plots   #########
            #############################
            '''
            for i, n in enumerate([1, 5, 50]):
                if i == 0:
                    axs[0].plot(all_rewards_steps, label=f'reward per step', color=reward_colors[i])
                else:
                    axs[0].plot(running_mean(all_rewards_steps, n), label=f'running mean: {n}', color=reward_colors[i])


            axs[0].set(xlabel='steps', ylabel='reward', title='Rewards')
            axs[0].legend(loc='upper left')
            axs[0].set_xlim(left=0)
            axs[0].grid()

            axs[1].set(xlabel='checkpoint', ylabel='mean reward', title='Mean reward and movements per checkpoint')
            axs[1].plot(game_rewards_mean[1:-1], label='reward', linewidth=2.0)
            for e in MOVEMENT:   
                axs[1].plot(running_mean(all_events[e][1:], 2), label=e)
            axs[1].set_xlim(left=0)
            axs[1].grid()
            axs[1].legend(ncol=len(MOVEMENT), loc='upper left')

            axs[2].set(xlabel='checkpoint', ylabel='count', title='Mean reward and event counts per checkpoint')
            axs[2].plot(game_rewards_mean[1:-1], label='reward', linewidth=2.0)
            for e in EVENTS_NO_MVNT:
                axs[2].plot(running_mean(all_events[e][1:], 2), label=e)
            axs[2].set_xlim(left=0)
            axs[2].grid()
            axs[2].legend(ncol=6, loc='upper left')
            fig.savefig(f"agent_code/revised_dq_agent/eval/{world.agents[0].name}_general.png")
            axs[0].clear()
            axs[1].clear()
            axs[2].clear()
            '''

            # same per checkpoint

            ax[0].set(xlabel='checkpoint',
                      ylabel='steps',
                      title="Steps survived per checkpoint")
            ax[0].plot(np.mean(np.array(all_steps_alive), axis=1),
                       label='mean steps',
                       color='dimgrey',
                       alpha=0.6)
            ax[0].plot(running_mean(np.mean(np.array(all_steps_alive), axis=1),
                                    10),
                       label='running mean (10)',
                       color='dimgrey')
            #ax[0].plot(np.array(all_ratios['crate-bomb-ratio']*args.games), label='crate/bomb')
            # print(np.array(all_ratios['crate-bomb-ratio']))
            ax[0].legend(loc='upper left')
            ax[0].set_xlim(0, len(all_steps_alive) - 1)
            ax[0].grid()

            ax[1].set(xlabel='checkpoint',
                      title="Score and reward per checkpoint")
            ax[1].plot(np.mean(np.array(all_game_rewards_mean), axis=1),
                       label='rewards',
                       color='navy',
                       alpha=0.6)
            ax[1].plot(running_mean(
                np.mean(np.array(all_game_rewards_mean), axis=1), 10),
                       label='running mean (10)',
                       color='navy')
            ax[1].set_ylabel('reward', color='navy')
            ax[1].set_xlim(0, len(all_steps_alive) - 1)
            ax[1].grid()
            ax_1_2.plot(np.mean(np.array(all_scores), axis=1),
                        color='crimson',
                        alpha=0.6)
            ax_1_2.set_ylabel('score', color='crimson')
            ax_1_2.plot(running_mean(np.mean(np.array(all_scores), axis=1),
                                     10),
                        color='crimson')
            ax_1_2.set_xlim(0, len(all_steps_alive) - 1)

            # adjust left scale to grid
            l = ax[1].get_ylim()
            l2 = ax_1_2.get_ylim()

            def f(x):
                return l2[0] + (x - l[0]) / (l[1] - l[0]) * (l2[1] - l2[0])

            ticks = f(ax[1].get_yticks())
            ax_1_2.yaxis.set_major_locator(
                matplotlib.ticker.FixedLocator(ticks))

            #align.yaxes(ax[1], 0, ax_1_2, 0, 0.5)

            y = []
            #['MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED','INVALID_ACTION', 'BOMB_DROPPED']
            color_moves = [
                'dodgerblue', 'deepskyblue', 'limegreen', 'seagreen',
                'slategrey', 'coral', 'orangered'
            ]
            for ev in MOVEMENT:
                y.append(
                    np.mean(np.array(all_events[ev]), axis=1) /
                    np.mean(np.array(all_steps_alive), axis=1))
            ax[2].stackplot(range(len(y[0])),
                            *y,
                            labels=MOVEMENT,
                            colors=color_moves)
            ax[2].legend(bbox_to_anchor=(0.5, 1.15),
                         loc='upper center',
                         ncol=len(MOVEMENT))
            ax[2].grid()
            ax[2].set_xlim(0, len(all_steps_alive) - 1)
            ax[2].set_ylim(0, 1)
            '''

            ax[2].set(xlabel='checkpoint', ylabel='mean reward', title='Mean events per game')
            for e in EVENTS_NO_MVNT:   
                ax[2].plot(running_mean(np.mean(np.array(all_events[e]), axis=0),2), label=e)
            ax[2].set_xlim(left=0)
            ax[2].grid()
            ax[2].legend(ncol=7, loc='upper left')

            ax[3].set(xlabel='checkpoint', ylabel='mean reward', title='Mean movements per game')
            for e in MOVEMENT:  
                ax[1].plot(running_mean(np.mean(np.array(all_events[e]), axis=0), 2), label=e)
            ax[3].set_xlim(left=0)
            ax[3].grid()
            ax[3].legend(ncol=len(MOVEMENT), loc='upper left')
            '''

            ax[3].set(xlabel='checkpoint',
                      title="Epsilon per checkpoint",
                      ylabel='epsilon')
            ax[3].plot(epsilons, color='cornflowerblue', linewidth=2.0)
            ax[3].set_xlim(0, len(all_steps_alive) - 1)
            ax[3].set_ylim(0, 1)
            ax[3].grid()

            fig2.savefig(
                f"agent_code/revised_dq_agent/eval/{world.agents[0].name}{eval_name}_checkpoint.png"
            )
            ax[0].clear()
            ax[1].clear()
            ax[2].clear()
            ax[3].clear()
            ax_1_2.clear()
예제 #3
0
def main(argv=None):

    # valid events
    EVENTS = [
        'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED',
        'INVALID_ACTION', 'BOMB_DROPPED', 'BOMB_EXPLODED', 'CRATE_DESTROYED',
        'COIN_FOUND', 'COIN_COLLECTED', 'KILLED_OPPONENT', 'KILLED_SELF',
        'GOT_KILLED', 'OPPONENT_ELIMINATED', 'SURVIVED_ROUND'
    ]
    MOVEMENT = [
        'MOVED_LEFT', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED',
        'INVALID_ACTION', 'BOMB_DROPPED'
    ]
    EVENTS_NO_MVNT = [
        'CRATE_DESTROYED', 'COIN_COLLECTED', 'KILLED_SELF', 'KILLED_OPPONENT',
        'OPPONENT_ELIMINATED', 'BOMB_DROPPED', 'COIN_FOUND', 'SURVIVED_ROUND'
    ]

    EVENTS_CHART = [
        'CRATE_DESTROYED', 'COIN_COLLECTED', 'KILLED_SELF', 'BOMB_DROPPED',
        'SURVIVED_ROUND', 'KILLED_OPPONENT'
    ]

    # interesting stuff for plotting
    all_game_rewards_mean = [
    ]  # has shape (#loaded checkpoints, #games per checkpoint)
    all_scores = []  # has shape (#loaded checkpoints, #games per checkpoint)
    all_scores_others = [
    ]  # has shape (#loaded checkpoints, #games per checkpoint, #other agents)
    all_steps_alive = [
    ]  # has shape (#loaded checkpoints, #games per checkpoint)
    all_rewards = [
    ]  # has shape (#loaded checkpoints, #games per checkpoint, #steps per game)
    all_rewards_steps = []  # has shape (#checkpoints*games*steps)

    all_events = defaultdict(
        list
    )  # dict containing eventcounts in shape (#loaded checkpoints, #games per checkpoint)
    all_ratios = defaultdict(
        list
    )  # dict containing ratios computed per chechkpoint, i.e. (#loaded checkpoints, )

    epsilons = []

    play_parser = ArgumentParser()

    # Run arguments
    agent_group = play_parser.add_mutually_exclusive_group()
    agent_group.add_argument(
        "--my-agent",
        type=str,
        help="Play agent of name ... against three rule_based_agents")
    agent_group.add_argument("--agents",
                             type=str,
                             nargs="+",
                             default=[],
                             help="Explicitly set the agent names in the game")
    play_parser.add_argument(
        "--save-steps",
        type=int,
        nargs="+",
        default=[0] * s.MAX_AGENTS,
        help="Explicitly set the save point for the agent")
    play_parser.add_argument(
        "--train",
        default=0,
        type=int,
        choices=[0, 1, 2, 3, 4],
        help="First … agents should be set to training mode")
    play_parser.add_argument("--continue-without-training",
                             default=False,
                             action="store_true")
    play_parser.add_argument("--eval-start",
                             default=0,
                             type=int,
                             help="first eval step")
    play_parser.add_argument("--eval-stop",
                             default=0,
                             type=int,
                             help="last eval step")
    play_parser.add_argument("--eval-step",
                             default=1,
                             type=int,
                             help="eval step")
    play_parser.add_argument("--games",
                             default=10,
                             type=int,
                             help="number of games to evaluate per checkpoint")
    play_parser.add_argument("--name",
                             default='',
                             type=str,
                             help="name of eval plots")
    # play_parser.add_argument("--single-process", default=False, action="store_true")

    play_parser.add_argument("--conf",
                             default='compare.json',
                             type=str,
                             help="compare conf json file")

    args = play_parser.parse_args(argv)
    args.no_gui = True
    args.make_video = False
    args.log_dir = '/tmp'

    conf = json.load(open(args.conf, 'r'))
    global REWARDS
    REWARDS = conf['rewards']

    compare_agents = conf['agents']

    # Initialize environment and agents
    agents = [(compare_agents[0]['name'], False)]
    if args.train == 0 and not args.continue_without_training:
        args.continue_without_training = True
    # if args.my_agent:
    #    agents.append((args.my_agent, len(agents) < args.train))
    #    args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1)
    for agent_name in args.agents:
        agents.append((agent_name, len(agents) < args.train))

    compare_name = args.name

    for agent_iter in tqdm(list(range(len(compare_agents)))):
        agents[0] = (compare_agents[agent_iter]['name'], False)
        global seed
        seed = 0

        os.environ['AGENT_CONF'] = compare_agents[agent_iter].get('conf', '')
        world = EvalWorld(args, agents)

        for a in world.agents:
            try:
                a.backend.runner.fake_self.agent.evaluate_model = True
            except:
                pass

        if 'step' in compare_agents[agent_iter]:
            prev_cwd = os.getcwd()
            try:
                os.chdir(f'./agent_code/{world.agents[0].backend.code_name}/')
                world.agents[0].backend.runner.fake_self.agent.load(
                    compare_agents[agent_iter]['step'])
            except Exception as e:
                print(f'{world.agents[0].name} does not support loading!')
                print(e)
            finally:
                os.chdir(prev_cwd)

        try:
            epsilons.append(
                world.agents[0].backend.runner.fake_self.agent.epsilon)
        except:
            epsilons.append(0)

        score = []
        score_others = []

        event_counter = defaultdict(list)
        step_counter = []
        reward_history = []
        move_history = deque(maxlen=2)

        for round_cnt in range(args.games):
            seed = round_cnt + 1

            score.append(0)
            score_others.append([0] * (len(world.agents) - 1))
            step_counter.append(0)
            reward_history.append([0])
            for ev in EVENTS:
                event_counter[ev].append(0)

            if not world.running:
                world.ready_for_restart_flag.wait()
                world.ready_for_restart_flag.clear()
                world.new_round()

            # Main game loop
            round_finished = False
            dead = [False, False, False, False]
            while not round_finished:
                if world.running:
                    world.do_step('WAIT')
                    if not dead[0]:
                        step_counter[-1] += 1
                        for ev in world.agents[0].events:
                            if ev == 'COIN_COLLECTED':
                                score[-1] += s.REWARD_COIN
                            elif ev == 'KILLED_OPPONENT':
                                score[-1] += s.REWARD_KILL
                            event_counter[ev][-1] += 1
                        for a_i, a in enumerate(world.agents[1:]):
                            if not dead[a_i + 1]:
                                for ev in a.events:
                                    if ev == 'COIN_COLLECTED':
                                        score_others[-1][a_i] += s.REWARD_COIN
                                    elif ev == 'KILLED_OPPONENT':
                                        score_others[-1][a_i] += s.REWARD_KILL
                        move_history, reward = compute_reward(
                            move_history, world.agents[0].events)
                        reward_history[-1].append(reward)
                        all_rewards_steps.append(reward)
                    for a_i, a in enumerate(world.agents):
                        dead[a_i] = a.dead
                    # if not world.agents[0].dead:
                    #    print(1, world.agents[0].events)
                    # if not world.agents[1].dead:
                    #    print(2, world.agents[1].events)
                    # if not world.agents[2].dead:
                    #    print(3, world.agents[2].events)
                    # if not world.agents[3].dead:
                    #    print(4, world.agents[3].events)
                else:
                    round_finished = True

            world.end()

        # general plotting values

        #print(f'score: {sum(score)}')
        all_scores.append(score)
        all_scores_others.append(score_others)
        #print(f'steps alive: {sum(step_counter)}')
        all_steps_alive.append(step_counter)
        for ev in EVENTS:
            #print(f'{ev}: {sum(event_counter[ev])}')
            all_events[ev].append(event_counter[ev])

        crate_bomb_ratio = sum(event_counter["CRATE_DESTROYED"]) / sum(
            event_counter["BOMB_DROPPED"])
        all_ratios['crate-bomb-ratio'].append(crate_bomb_ratio)
        #print(f'crate-bomb-ratio: {round(crate_bomb_ratio, 2)}')

        game_rewards_mean = [np.mean(x) for x in reward_history]
        all_rewards.append(reward_history)

        reward_colors = ['cornflowerblue', 'midnightblue', 'crimson']
        all_game_rewards_mean.append([np.mean(x) for x in reward_history])

    #############################
    #######     plots   #########
    #############################

    fig, ax = plt.subplots(nrows=(4 + len(EVENTS_CHART)) // 2 + 1,
                           ncols=2,
                           figsize=(16, 20))
    fig.tight_layout(pad=8)

    ax = ax.flatten()

    agent_names = [x.get('alias', x['name'])
                   for x in conf['agents']][:len(all_steps_alive)]
    # same per checkpoint

    ax[0].set(xlabel='agent name', ylabel='steps', title="Survival comparison")
    ax[0].bar(agent_names,
              np.mean(np.array(all_steps_alive), axis=1),
              label='mean steps',
              color='dimgrey')
    for label in ax[0].get_xticklabels():
        label.set_rotation(30)
        label.set_ha('right')

    ax[1].set(xlabel='agent name', title="Score comparison")
    ax[1].bar(agent_names,
              np.mean(np.array(all_scores), axis=1),
              label='mean score',
              color='crimson')
    ax[1].set_ylabel('score')
    for label in ax[1].get_xticklabels():
        label.set_rotation(30)
        label.set_ha('right')

    ax[2].set(xlabel='agent name', title="Reward comparison")
    ax[2].bar(agent_names,
              np.mean(np.array(all_game_rewards_mean), axis=1),
              label='mean reward',
              color='navy')
    ax[2].set_ylabel('reward')
    for label in ax[2].get_xticklabels():
        label.set_rotation(30)
        label.set_ha('right')

    #['GOT_KILLED', 'MOVED_RIGHT', 'MOVED_UP', 'MOVED_DOWN', 'WAITED','INVALID_ACTION', 'BOMB_DROPPED']
    color_moves = [
        'dodgerblue', 'deepskyblue', 'limegreen', 'slategrey', 'seagreen',
        'coral', 'orangered'
    ]
    for i, ev in enumerate(EVENTS_CHART):

        ax[3 + i].set(xlabel='agent name', title=f"{ev} comparison")
        ax[3 + i].bar(agent_names,
                      np.mean(np.array(all_events[ev]), axis=1),
                      label=f"{ev} mean",
                      color=color_moves[i])
        ax[3 + i].set_ylabel(f"{ev}")
        for label in ax[3 + i].get_xticklabels():
            label.set_rotation(30)
            label.set_ha('right')

    score_sums = np.sum(np.array(all_scores_others), axis=1).T
    score_sums = np.vstack((np.sum(np.array(all_scores),
                                   axis=1), score_sums)).astype(np.float32)

    total_scores = np.sum(score_sums, axis=0).astype(np.float32)

    colors = ['#0197F6', 'dimgrey', 'darkgray', 'gainsboro']
    labels = ['agent', 'enemy #1', 'enemy #2', 'enemy #3']
    previous = np.zeros_like(score_sums[0])
    for i in range(len(score_sums)):
        score_sums[i] = np.true_divide(score_sums[i], total_scores)

        ax[3 + len(EVENTS_CHART)].bar(agent_names,
                                      score_sums[i],
                                      bottom=previous,
                                      color=colors[i],
                                      label=labels[i])
        previous += score_sums[i]

    ax[3 + len(EVENTS_CHART)].set(xlabel='agent name',
                                  title='score distibution aross all games')
    ax[3 + len(EVENTS_CHART)].set_ylabel('score distribution')
    ax[3 + len(EVENTS_CHART)].set_ylim(0, 1)
    ax[3 + len(EVENTS_CHART)].legend(bbox_to_anchor=(1, 1), handlelength=0.8)
    for label in ax[3 + len(EVENTS_CHART)].get_xticklabels():
        label.set_rotation(30)
        label.set_ha('right')

    got_killed = np.mean(np.array(all_events['GOT_KILLED']), axis=1)
    killed_self = np.mean(np.array(all_events['KILLED_SELF']), axis=1)
    got_killed -= killed_self
    survived_round = np.mean(np.array(all_events['SURVIVED_ROUND']), axis=1)

    survival = np.vstack(
        (survived_round, killed_self, got_killed)).astype(np.float32)

    colors = ['springgreen', 'salmon', 'red']
    labels = ['survived', 'killed self', 'killed by enemy']
    previous = np.zeros_like(survived_round)
    for i in range(len(survival)):
        ax[4 + len(EVENTS_CHART)].bar(agent_names,
                                      survival[i],
                                      bottom=previous,
                                      color=colors[i],
                                      label=labels[i])
        previous += survival[i]

    ax[4 + len(EVENTS_CHART)].set(xlabel='agent name',
                                  title='survival distibution aross all games')
    ax[4 + len(EVENTS_CHART)].set_ylabel('survival distribution')
    ax[4 + len(EVENTS_CHART)].set_ylim(0, 1)
    ax[4 + len(EVENTS_CHART)].legend(bbox_to_anchor=(1, 1), handlelength=0.8)
    for label in ax[4 + len(EVENTS_CHART)].get_xticklabels():
        label.set_rotation(30)
        label.set_ha('right')

    ax[5 + len(EVENTS_CHART)].set_visible(False)

    fig.savefig(f"eval/{compare_name}_comparison.png", dpi=300)
예제 #4
0
파일: train.py 프로젝트: jfreyberg/ifml
def main(argv=None):
    play_parser = ArgumentParser()

    # Run arguments
    agent_group = play_parser.add_mutually_exclusive_group()
    agent_group.add_argument(
        "--my-agent",
        type=str,
        help="Play agent of name ... against three rule_based_agents")
    agent_group.add_argument("--agents",
                             type=str,
                             nargs="+",
                             default=["rule_based_agent"] * s.MAX_AGENTS,
                             help="Explicitly set the agent names in the game")
    play_parser.add_argument(
        "--train",
        default=0,
        type=int,
        choices=[0, 1, 2, 3, 4],
        help="First … agents should be set to training mode")
    play_parser.add_argument("--continue-without-training",
                             default=False,
                             action="store_true")
    # play_parser.add_argument("--single-process", default=False, action="store_true")

    play_parser.add_argument("--n-rounds",
                             type=int,
                             default=-1,
                             help="How many rounds to play")
    play_parser.add_argument("--n-steps",
                             type=int,
                             default=-1,
                             help="How many steps to play")
    play_parser.add_argument("--reload-steps",
                             type=int,
                             default=10000,
                             help="How many steps until reload")

    args = play_parser.parse_args(argv)
    args.no_gui = True
    args.make_video = False
    args.log_dir = '/tmp'

    # Initialize environment and agents
    agents = []
    if args.train == 0 and not args.continue_without_training:
        args.continue_without_training = True
    if args.my_agent:
        agents.append((args.my_agent, len(agents) < args.train))
        args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1)
    for agent_name in args.agents:
        agents.append((agent_name, len(agents) < args.train))

    world = TrainWorld(args, agents)

    step_counter = 0
    round_counter = 0
    prev_load_counter = 0

    tqdm_iter_count = args.n_rounds if args.n_rounds != -1 else args.n_steps
    pbar = tqdm(total=tqdm_iter_count)
    # Run one or more games
    done = False
    while not done:
        if prev_load_counter + args.reload_steps <= step_counter:
            prev_load_counter = step_counter
            print('trying to update agents')
            try:
                save_step = world.agents[
                    0].backend.runner.fake_self.agent.save_step - 1
                prev_cwd = os.getcwd()
                if save_step >= 0:
                    os.chdir(
                        f'./agent_code/{world.agents[0].backend.code_name}/')
                    for a in world.agents[1:]:
                        try:
                            a.backend.runner.fake_self.agent.load(save_step)
                            print(
                                f'reloaded agent {a.name} for step {save_step}'
                            )
                        except Exception as e:
                            print(f'{a.name} does not support loading!')
                            print(e)
            except Exception as e:
                print('first agent is not one of us!')
                print(e)
            finally:
                os.chdir(prev_cwd)
        if not world.running:
            world.ready_for_restart_flag.wait()
            world.ready_for_restart_flag.clear()
            world.new_round()

        # Main game loop
        round_finished = False
        while not round_finished:
            if world.running:
                if step_counter >= args.n_steps and args.n_steps != -1:
                    world.end_round()
                world.do_step('WAIT')
                step_counter += 1
                if args.n_rounds == -1:
                    pbar.update(1)
            else:
                round_finished = True
                round_counter += 1
                if args.n_steps == -1:
                    pbar.update(1)

        if step_counter >= args.n_steps and args.n_steps != -1:
            done = True
        if round_counter >= args.n_rounds and args.n_rounds != -1:
            done = True

    world.end()

    print(f'steps trained: {step_counter}')
    print(f'rounds trained: {round_counter}')