예제 #1
0
def main():
    '''Simple function to bootstrap a game.

       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        # RandAgent(),
        # SuicideAgent(),
        agents.PlayerAgent(agent_control="wasd"
                           ),  # W,A,S,D to move, E to lay bomb
        StopAgent(),
        StopAgent(),
        # StopAgent(),
        # agents.SimpleAgent(),
        # agents.SimpleAgent(),
        # agents.SimpleAgent(),
        # agents.SimpleAgent(),
        agents.PlayerAgent(agent_control="arrows"
                           ),  # arrows to move, space to lay bomb
        # SuicideAgent(),
        # SuicideAgent(),
        # SuicideAgent(),
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeRadioCompetition-v2', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(100):
        print('Start to reset')
        state = env.reset()
        print('Reset complete')
        done = False
        while not done:
            actions = env.act(state)
            # print(actions[0])
            # actions[0] = [actions[0], 1, 1]
            state, reward, done, info = env.step(actions)
            bomb_life = state[0]['bomb_life']
            bomb_strenth = state[0]['bomb_blast_strength']
            bomb_life = feature_utils.get_bomb_life(state[0])
            # obs = featurize(state[0], env.position_trav)
            env.render()
            # print(reward)
            # print()
        print(info)
        print('Episode {} finished'.format(i_episode))
    env.close()
예제 #2
0
    agents.SimpleAgent(),
    # agents.SimpleAgent(),
    # agents.DockerAgent('multiagentlearning/hakozakijunctions', port=1023),
    # agents.DockerAgent('tu2id4n/hit_pmm:fix2', port=1025),
    # agents.SimpleAgent()
    # agents.SimpleAgent()
]
env = pommerman.make('OneVsOne-v0', agent_list)

for episode in tqdm(range(1000)):
    obs = env.reset()
    done = False
    while not done:
        all_actions = env.act(obs)
        obs, rewards, done, info = env.step(all_actions)
        bomb_life = feature_utils.get_bomb_life(obs_nf=obs[0], rang=8)
        # print(obs)
        env.render()
    print(info)
print('1000 test ok')

env.close()

# import numpy as np
#
# f_path = 'dataset/hako_v2/228n5_5.npz'
# sub_data = np.load(f_path, allow_pickle=True)
# obs = sub_data['obs']
# actions = sub_data['actions']
# del sub_data
# print(obs[0])
예제 #3
0
def get_rewards_8m8(agents,
                    step_count,
                    max_steps,
                    whole_obs_pre,
                    whole_obs,
                    act_abs_pres,
                    idx=0):
    # print_info('-------------------| Agent', str(idx) +' |-------------------')
    # print(act_abs_pres)
    print_info(' ', ' ')

    def any_lst_equal(lst, values):
        """Checks if list are equal"""
        return any([lst == v for v in values])

    alive_agents = [num for num, agent in enumerate(agents) \
                    if agent.is_alive]

    obs_pre = copy.deepcopy(whole_obs_pre[idx])
    obs_now = copy.deepcopy(whole_obs[idx])
    act_abs_pre = act_abs_pres
    position_pre = obs_pre['position']
    position_now = obs_now['position']

    bomb_life_now = feature_utils.get_bomb_life(obs_now, rang=8)
    bomb_life_pre = feature_utils.get_bomb_life(obs_pre, rang=8)
    my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now,
                                                      position_now,
                                                      rang=8)

    extrabomb = constants.Item.ExtraBomb.value
    kick = constants.Item.Kick.value
    incrrange = constants.Item.IncrRange.value
    bomb = constants.Item.Bomb.value
    wood = constants.Item.Wood.value
    agent1 = constants.Item.Agent1.value
    agent3 = constants.Item.Agent3.value
    teamate = obs_pre['teammate'].value

    reward = 0

    # 自己被炸死
    # if 0 < bomb_life_now[position_now] < 4:
    #     reward -= 1
    #     print_info('You dead', '-1')

    act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre,
                                          rang=8)  # 这里只用来判断
    goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre, rang=8)
    # 如果是放bomb
    if act_pre == 5:
        # 没有ammo放bomb
        if obs_pre['ammo'] == 0:
            reward -= 0.1
            print_info('No ammo', '-0.1')
        # 如果有ammo
        else:
            nothing = True
            # 放的bomb可以波及到wood/enemy
            for r in range(8):
                for c in range(8):
                    if my_bomb_life_now[(r, c)] > 0:
                        if obs_pre['board'][(r, c)] in [wood]:
                            reward += 0.2
                            nothing = False
                            print_info('bomb -> wood', '+0.2')
                        if obs_pre['board'][(r, c)] in [agent1, agent3]:
                            reward += 0.3
                            nothing = False
                            print_info('bomb -> enemy', '+0.3')
                            # print(obs_pre['board'][(r, c)])
                        if obs_pre['board'][(r, c)] in [
                                incrrange, extrabomb, kick
                        ]:
                            reward -= 0.05
                            print_info('bomb -> powerup', '-0.05')
                        if obs_pre['board'][(r, c)] in [teamate]:
                            reward -= 0.05
                            print_info('bomb -> teammate', '-0.05')
            # if nothing:
            #     reward -= 0.1
            #     # print(obs_pre['board'][(r, c)])
            #     print_info('Useless bomb', '-0.1')
    # 没有动
    elif act_pre == 0:
        if obs_pre['position'] != goal_pre:
            print_info('obs_pre', obs_pre['position'])
            print_info('goal_pre', goal_pre)
            reward -= 0.1
            # print(goal_pre)
            print_info('Faultal goal', '-0.1')
    # 如果是移动
    else:
        # 踢炸弹获得奖励
        if position_now != position_pre:
            if obs_pre['can_kick']:
                if obs_pre['board'][goal_pre] == bomb:
                    reward += 0.01
                    print_info('Want to kick', '+0.01')
                if obs_pre['board'][position_now] == bomb:
                    reward += 0.2
                    print_info('Kick', '+0.2')
            # 从安全位置进入到被炸弹波及之中
            if bomb_life_pre[
                    position_pre] == 0 and bomb_life_now[position_now] > 0:
                reward -= 0.15
                print_info('Enter the explosion range', '-0.15')
            # 被炸弹波及但是在向安全的位置移动
            if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0:
                reward += 0.05
                print_info('Escape from the explosin range ', '+0.05')
            # 向着items移动
            if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]:
                reward += 0.01
                print_info('Want a Item', '+0.01')
                # 吃到items
                if obs_pre['board'][position_now] in [
                        extrabomb, kick, incrrange
                ]:
                    reward += 0.1
                    print_info('Eat a Item', '+0.1')
            # 吃到items
            elif obs_pre['board'][position_now] in [
                    extrabomb, kick, incrrange
            ]:
                reward += 0.05
                print_info('Passing by a Item', '+0.05')

    if len(alive_agents) == 1:
        # An agent won. Give them +1, the other -1.
        print_info('Game Over', int(agents[0].is_alive))
        return [reward + 2 * int(agent.is_alive) - 1 for agent in agents]
    elif step_count >= max_steps:
        # Game is over from time. Everyone gets -1.
        return [reward - 1] * 2
    else:
        # Game running
        return [reward] * 2
예제 #4
0
def get_rewards_v3_8(agents, step_count, max_steps, whole_obs_pre, whole_obs,
                     act_abs_pres, idx):
    # print_info('-------------------| Agent', str(idx) +' |-------------------')
    # print(act_abs_pres)
    def any_lst_equal(lst, values):
        """Checks if list are equal"""
        return any([lst == v for v in values])

    alive_agents = [num for num, agent in enumerate(agents) \
                    if agent.is_alive]

    obs_pre = copy.deepcopy(whole_obs_pre[idx])
    obs_now = copy.deepcopy(whole_obs[idx])
    act_abs_pre = act_abs_pres[idx]
    position_pre = obs_pre['position']
    position_now = obs_now['position']

    bomb_life_now = feature_utils.get_bomb_life(obs_now)
    bomb_life_pre = feature_utils.get_bomb_life(obs_pre)
    my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now,
                                                      position_now)

    extrabomb = constants.Item.ExtraBomb.value
    kick = constants.Item.Kick.value
    incrrange = constants.Item.IncrRange.value
    bomb = constants.Item.Bomb.value
    wood = constants.Item.Wood.value
    agent1 = constants.Item.Agent1.value
    agent3 = constants.Item.Agent3.value
    teamate = obs_pre['teammate'].value

    reward = 0

    # 自己被炸死
    if 0 < bomb_life_now[position_now] < 4:
        reward -= 1
        print_info('You dead', '-1')

    act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre)  # 这里只用来判断
    goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre)
    # 如果是放bomb
    if act_pre == 5:
        # 没有ammo放bomb
        if obs_pre['ammo'] == 0:
            reward -= 0.1
            print_info('No ammo', '-0.1')
        # 如果有ammo
        else:
            nothing = True
            # 放的bomb可以波及到wood/enemy
            for r in range(11):
                for c in range(11):
                    if my_bomb_life_now[(r, c)] > 0:
                        if obs_pre['board'][(r, c)] in [wood]:
                            reward += 0.2
                            nothing = False
                            print_info('bomb -> wood', '+0.2')
                        if obs_pre['board'][(r, c)] in [agent1, agent3]:
                            reward += 0.3
                            nothing = False
                            print_info('bomb -> enemy', '+0.3')
                        if obs_pre['board'][(r, c)] in [
                                incrrange, extrabomb, kick
                        ]:
                            reward -= 0.05
                            print_info('bomb -> powerup', '-0.05')
                        if obs_pre['board'][(r, c)] in [teamate]:
                            reward -= 0.05
                            print_info('bomb -> teammate', '-0.05')
            if nothing:
                reward -= 0.1
                print_info('Useless bomb', '-0.1')
    # 没有动
    elif act_pre == 0:
        if obs_pre['position'] != goal_pre:
            reward -= 0.1
            print_info('Faultal goal', '-0.1')
    # 如果是移动
    else:
        # 踢炸弹获得奖励
        if position_now != position_pre:
            if obs_pre['can_kick']:
                if obs_pre['board'][goal_pre] == bomb:
                    reward += 0.01
                    print_info('Want to kick', '+0.01')
                if obs_pre['board'][position_now] == bomb:
                    reward += 0.2
                    print_info('Kick', '+0.2')
            # 从安全位置进入到被炸弹波及之中
            if bomb_life_pre[
                    position_pre] == 0 and bomb_life_now[position_now] > 0:
                reward -= 0.15
                print_info('Enter the explosion range', '-0.15')
            # 被炸弹波及但是在向安全的位置移动
            if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0:
                reward += 0.05
                print_info('Escape from the explosin range ', '+0.05')
            # 向着items移动
            if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]:
                reward += 0.01
                print_info('Want a Item', '+0.01')
                # 吃到items
                if obs_pre['board'][position_now] in [
                        extrabomb, kick, incrrange
                ]:
                    reward += 0.1
                    print_info('Eat a Item', '+0.1')
            # 吃到items
            elif obs_pre['board'][position_now] in [
                    extrabomb, kick, incrrange
            ]:
                reward += 0.05
                print_info('Passing by a Item', '+0.05')

    # We are playing a team game.
    if any_lst_equal(alive_agents, [[0, 2], [0], [2]]):
        # Team [0, 2] wins.
        print_info('Wins and agent0 alive.', reward + 1)
        return reward + 1
    elif any_lst_equal(alive_agents, [[1, 3]]):
        # Team [1, 3] wins and no enemy dead.
        print_info('Loss and no enemy dead.', reward - 1)
        return reward - 1
    elif any_lst_equal(alive_agents, [[1], [3]]):
        # Team [1, 3] wins and one enemy dead.
        print_info('Loss and one enemy dead.', reward - 0.6)
        return reward - 0.6
    elif step_count >= max_steps and any_lst_equal(
            alive_agents, [[0, 1], [0, 1, 2], [0, 3], [0, 2, 3]]):
        # tie and one enemy dead.
        print_info('Tie and one enemy dead.', reward - 0.6)
        return reward - 0.6
    elif step_count >= max_steps:
        # Game is over by max_steps. All agents tie.
        print_info('Game is over by max_steps. All agents tie.', reward - 1)
        return reward - 1
    elif len(alive_agents) == 0:
        # Everyone's dead. All agents tie.
        print_info('Everyone is dead. All agents tie.', reward)
        return reward
    else:
        # No team has yet won or lost.
        return reward
예제 #5
0
def get_rewards_v3_7(agents, step_count, max_steps, whole_obs_pre, whole_obs,
                     act_abs_pre):
    def any_lst_equal(lst, values):
        """Checks if list are equal"""
        return any([lst == v for v in values])

    alive_agents = [num for num, agent in enumerate(agents) \
                    if agent.is_alive]

    obs_pre = copy.deepcopy(whole_obs_pre[0])
    obs_now = copy.deepcopy(whole_obs[0])

    position_pre = obs_pre['position']
    position_now = obs_now['position']

    bomb_life_now = feature_utils.get_bomb_life(obs_now)
    bomb_life_pre = feature_utils.get_bomb_life(obs_pre)
    my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now,
                                                      position_now)

    extrabomb = constants.Item.ExtraBomb.value
    kick = constants.Item.Kick.value
    incrrange = constants.Item.IncrRange.value
    bomb = constants.Item.Bomb.value
    wood = constants.Item.Wood.value
    agent1 = constants.Item.Agent1.value
    agent3 = constants.Item.Agent3.value
    agent2 = constants.Item.Agent2.value
    e11_pre = feature_utils.extra_position(11, obs_pre['board'])
    e13_pre = feature_utils.extra_position(13, obs_pre['board'])
    e11_now = feature_utils.extra_position(11, obs_now['board'])
    e13_now = feature_utils.extra_position(13, obs_now['board'])

    reward = 0
    # # 敌人被炸死
    # if e11 is not None and 0 < bomb_life[e11] < 4:
    #     reward += 0.5
    #     print_info('e11被炸死', '+0.5')
    # if e13 is not None and 0 < bomb_life[e13] < 4:
    #     reward += 0.5
    #     print_info('e13被炸死', '+0.5')

    # 敌人从视野中消失:
    # if e11_now is None and e11_pre is not None:
    #     reward -= 0.02
    #     print_info('敌人e11消失', '-0.01')
    # if e13_now is None and e13_pre is not None:
    #     reward -= 0.02
    #     print_info('敌人e13消失', '-0.01')
    # if e11_pre is None and e11_now is not None:
    #     reward += 0.01
    #     print_info('敌人e11出现', '+0.01')
    # if e13_pre is None and e13_now is not None:
    #     reward += 0.01
    #     print_info('敌人e13出现', '+0.01')

    # 自己被炸死
    if 0 < bomb_life_now[position_now] < 4:
        reward -= 1
        print_info('You dead', '-1')

    act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre)  # 这里只用来判断
    goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre)
    # 如果是放bomb
    if act_pre == 5:
        # 没有ammo放bomb
        if obs_pre['ammo'] == 0:
            reward -= 0.1
            print_info('No ammo', '-0.1')
        # 如果有ammo
        else:
            nothing = True
            # 放的bomb可以波及到wood/enemy
            for r in range(11):
                for c in range(11):
                    if my_bomb_life_now[(r, c)] > 0:
                        if obs_pre['board'][(r, c)] in [wood]:
                            reward += 0.2
                            nothing = False
                            print_info('bomb -> wood', '+0.2')
                        if obs_pre['board'][(r, c)] in [agent1, agent3]:
                            reward += 0.3
                            nothing = False
                            print_info('bomb -> enemy', '+0.3')
                        if obs_pre['board'][(r, c)] in [
                                incrrange, extrabomb, kick
                        ]:
                            reward -= 0.05
                            print_info('bomb -> powerup', '-0.05')
                        if obs_pre['board'][(r, c)] in [agent2]:
                            reward -= 0.05
                            print_info('bomb -> teammate', '-0.05')
            if nothing:
                reward -= 0.1
                print_info('Useless bomb', '-0.1')
    # 没有动
    elif act_pre == 0:
        if obs_pre['position'] != goal_pre:
            reward -= 0.1
            print_info('Faultal goal', '-0.1')
    # 如果是移动
    else:
        # r_pre, c_pre = position_pre
        # r_now, c_now = position_now
        # r_to = r_now - r_pre
        # c_to = c_now - c_pre
        # if (r_to, c_to) == (-1, 0): act_pre = 1
        # if (r_to, c_to) == (1, 0): act_pre = 2
        # if (r_to, c_to) == (0, -1): act_pre = 3
        # if (r_to, c_to) == (0, 1): act_pre = 4
        # 有效的移动
        # reward += 0.001
        # print_info('有效的移动', '+0.001')
        # 踢炸弹获得奖励
        if position_now != position_pre:
            if obs_pre['can_kick']:
                if obs_pre['board'][goal_pre] == bomb:
                    reward += 0.01
                    print_info('Want to kick', '+0.01')
                if obs_pre['board'][position_now] == bomb:
                    reward += 0.2
                    print_info('Kick', '+0.2')
            # 从安全位置进入到被炸弹波及之中
            if bomb_life_pre[
                    position_pre] == 0 and bomb_life_now[position_now] > 0:
                reward -= 0.15
                print_info('Enter the explosion range', '-0.15')
            # 被炸弹波及但是在向安全的位置移动
            if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0:
                reward += 0.05
                print_info('Escape from the explosin range ', '+0.05')
            # 向着items移动
            if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]:
                reward += 0.01
                print_info('Want a Item', '+0.01')
                # 吃到items
                if obs_pre['board'][position_now] in [
                        extrabomb, kick, incrrange
                ]:
                    reward += 0.1
                    print_info('Eat a Item', '+0.1')
            # 吃到items
            elif obs_pre['board'][position_now] in [
                    extrabomb, kick, incrrange
            ]:
                reward += 0.05
                print_info('Passing by a Item', '+0.05')

    # We are playing a team game.
    if any_lst_equal(alive_agents, [[0, 2], [0], [2]]):
        # Team [0, 2] wins.
        print_info('Wins and agent0 alive.', reward + 1)
        return [reward + 1, -1, 1, -1]
    elif any_lst_equal(alive_agents, [[1, 3]]):
        # Team [1, 3] wins and no enemy dead.
        print_info('Loss and no enemy dead.', reward - 1)
        return [reward - 1, 1, -1, 1]
    elif any_lst_equal(alive_agents, [[1], [3]]):
        # Team [1, 3] wins and one enemy dead.
        print_info('Loss and one enemy dead.', reward - 0.6)
        return [reward - 0.6, 1, -1, 1]
    elif step_count >= max_steps and any_lst_equal(
            alive_agents, [[0, 1], [0, 1, 2], [0, 3], [0, 2, 3]]):
        # tie and one enemy dead.
        print_info('Tie and one enemy dead.', reward - 0.6)
        return [reward - 0.6, 1, -1, 1]
    elif step_count >= max_steps:
        # Game is over by max_steps. All agents tie.
        print_info('Game is over by max_steps. All agents tie.', reward - 1)
        return [reward - 1] * 4
    elif len(alive_agents) == 0:
        # Everyone's dead. All agents tie.
        print_info('Everyone is dead. All agents tie.', reward)
        return [reward] * 4
    else:
        # No team has yet won or lost.
        return [reward] * 4
예제 #6
0
def get_rewards_v3_6(agents, step_count, max_steps, whole_obs_pre, whole_obs,
                     act_abs_pre):
    def any_lst_equal(lst, values):
        """Checks if list are equal"""
        return any([lst == v for v in values])

    alive_agents = [num for num, agent in enumerate(agents) \
                    if agent.is_alive]

    obs_pre = copy.deepcopy(whole_obs_pre[0])
    obs_now = copy.deepcopy(whole_obs[0])

    position_pre = obs_pre['position']
    position_now = obs_now['position']

    bomb_life_now = feature_utils.get_bomb_life(obs_now)
    bomb_life_pre = feature_utils.get_bomb_life(obs_pre)
    my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now,
                                                      position_now)

    extrabomb = constants.Item.ExtraBomb.value
    kick = constants.Item.Kick.value
    incrrange = constants.Item.IncrRange.value
    wood = constants.Item.Wood.value
    agent1 = constants.Item.Agent1.value
    agent3 = constants.Item.Agent3.value
    e11_now = feature_utils.extra_position(11, obs_now['board'])
    e13_now = feature_utils.extra_position(13, obs_now['board'])

    reward = 0
    # # 敌人被炸死
    # if e11 is not None and 0 < bomb_life[e11] < 4:
    #     reward += 0.5
    #     print_info('e11被炸死', '+0.5')
    # if e13 is not None and 0 < bomb_life[e13] < 4:
    #     reward += 0.5
    #     print_info('e13被炸死', '+0.5')

    # 自己被炸死
    if 0 < bomb_life_now[position_now] < 4:
        reward -= 0.5
        print_info('自己被炸死', '-0.5')

    act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre)  # 这里只用来判断
    goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre)
    # 如果是放bomb
    if act_pre == 5:
        # 没有ammo放bomb
        if obs_pre['ammo'] == 0:
            reward -= 0.1
            print_info('没有ammo放炸弹', '-0.1')
        # 如果有ammo
        else:
            # 放的bomb可以波及到wood/enemy
            for r in range(11):
                for c in range(11):
                    if my_bomb_life_now[(r, c)] > 0:
                        if obs_pre['board'][(r, c)] in [wood]:
                            reward += 0.2
                            print_info('炸弹波及到wood', '+0.2')
                        if obs_pre['board'][(r, c)] in [agent1, agent3]:
                            reward += 0.3
                            print_info('炸弹波及到敌人', '+0.3')
    # 没有动
    elif act_pre == 0:
        if obs_pre['position'] != goal_pre:
            reward -= 0.01
            print_info('无效移动', '-0.01')
    # 如果是移动
    else:
        # 有效的移动
        # reward += 0.001
        # print_info('有效的移动', '+0.001')
        # 被炸弹波及但是在向安全的位置移动
        if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0:
            reward += 0.05
            print_info('被炸弹波及向着安全的位置移动', '+0.05')
        # 向着items移动
        if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]:
            reward += 0.01
            print_info('向items移动', '+0.01')
            # 吃到items
            if obs_pre['board'][position_now] in [extrabomb, kick, incrrange]:
                reward += 0.3
                print_info('向着item移动并吃到items', '+0.2')
        # 吃到items
        elif obs_pre['board'][position_now] in [extrabomb, kick, incrrange]:
            reward += 0.05
            print_info('路过吃到items', '+0.05')

    # We are playing a team game.
    if any_lst_equal(alive_agents, [[0, 2], [0], [2]]):
        # Team [0, 2] wins.
        print_info('Team [0, 2] wins and agent0 alive.', reward + 1)
        return [reward + 1, -1, 1, -1]
    elif any_lst_equal(alive_agents, [[1, 3]]):
        # Team [1, 3] wins and no enemy dead.
        print_info('Team [1, 3] wins and no enemy dead.', reward - 1)
        return [reward - 1, 1, -1, 1]
    elif any_lst_equal(alive_agents, [[1], [3]]):
        # Team [1, 3] wins and one enemy dead.
        print_info('Team [1, 3] wins and one enemy dead.', reward + 0.5)
        return [reward + 0.5, 1, -1, 1]
    elif step_count >= max_steps and any_lst_equal(
            alive_agents, [[0, 1], [0, 1, 2], [0, 3], [0, 2, 3]]):
        # tie and one enemy dead.
        print_info('tie and one enemy dead.', reward + 0.5)
        return [reward + 0.5, 1, -1, 1]
    elif step_count >= max_steps:
        # Game is over by max_steps. All agents tie.
        print_info('Game is over by max_steps. All agents tie.', reward - 1)
        return [reward - 1] * 4
    elif len(alive_agents) == 0:
        # Everyone's dead. All agents tie.
        print_info('Everyone is dead. All agents tie.', reward + 0.5)
        return [reward + 0.5] * 4
    else:
        # No team has yet won or lost.
        return [reward] * 4