def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ # RandAgent(), # SuicideAgent(), agents.PlayerAgent(agent_control="wasd" ), # W,A,S,D to move, E to lay bomb StopAgent(), StopAgent(), # StopAgent(), # agents.SimpleAgent(), # agents.SimpleAgent(), # agents.SimpleAgent(), # agents.SimpleAgent(), agents.PlayerAgent(agent_control="arrows" ), # arrows to move, space to lay bomb # SuicideAgent(), # SuicideAgent(), # SuicideAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeRadioCompetition-v2', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(100): print('Start to reset') state = env.reset() print('Reset complete') done = False while not done: actions = env.act(state) # print(actions[0]) # actions[0] = [actions[0], 1, 1] state, reward, done, info = env.step(actions) bomb_life = state[0]['bomb_life'] bomb_strenth = state[0]['bomb_blast_strength'] bomb_life = feature_utils.get_bomb_life(state[0]) # obs = featurize(state[0], env.position_trav) env.render() # print(reward) # print() print(info) print('Episode {} finished'.format(i_episode)) env.close()
agents.SimpleAgent(), # agents.SimpleAgent(), # agents.DockerAgent('multiagentlearning/hakozakijunctions', port=1023), # agents.DockerAgent('tu2id4n/hit_pmm:fix2', port=1025), # agents.SimpleAgent() # agents.SimpleAgent() ] env = pommerman.make('OneVsOne-v0', agent_list) for episode in tqdm(range(1000)): obs = env.reset() done = False while not done: all_actions = env.act(obs) obs, rewards, done, info = env.step(all_actions) bomb_life = feature_utils.get_bomb_life(obs_nf=obs[0], rang=8) # print(obs) env.render() print(info) print('1000 test ok') env.close() # import numpy as np # # f_path = 'dataset/hako_v2/228n5_5.npz' # sub_data = np.load(f_path, allow_pickle=True) # obs = sub_data['obs'] # actions = sub_data['actions'] # del sub_data # print(obs[0])
def get_rewards_8m8(agents, step_count, max_steps, whole_obs_pre, whole_obs, act_abs_pres, idx=0): # print_info('-------------------| Agent', str(idx) +' |-------------------') # print(act_abs_pres) print_info(' ', ' ') def any_lst_equal(lst, values): """Checks if list are equal""" return any([lst == v for v in values]) alive_agents = [num for num, agent in enumerate(agents) \ if agent.is_alive] obs_pre = copy.deepcopy(whole_obs_pre[idx]) obs_now = copy.deepcopy(whole_obs[idx]) act_abs_pre = act_abs_pres position_pre = obs_pre['position'] position_now = obs_now['position'] bomb_life_now = feature_utils.get_bomb_life(obs_now, rang=8) bomb_life_pre = feature_utils.get_bomb_life(obs_pre, rang=8) my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now, position_now, rang=8) extrabomb = constants.Item.ExtraBomb.value kick = constants.Item.Kick.value incrrange = constants.Item.IncrRange.value bomb = constants.Item.Bomb.value wood = constants.Item.Wood.value agent1 = constants.Item.Agent1.value agent3 = constants.Item.Agent3.value teamate = obs_pre['teammate'].value reward = 0 # 自己被炸死 # if 0 < bomb_life_now[position_now] < 4: # reward -= 1 # print_info('You dead', '-1') act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre, rang=8) # 这里只用来判断 goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre, rang=8) # 如果是放bomb if act_pre == 5: # 没有ammo放bomb if obs_pre['ammo'] == 0: reward -= 0.1 print_info('No ammo', '-0.1') # 如果有ammo else: nothing = True # 放的bomb可以波及到wood/enemy for r in range(8): for c in range(8): if my_bomb_life_now[(r, c)] > 0: if obs_pre['board'][(r, c)] in [wood]: reward += 0.2 nothing = False print_info('bomb -> wood', '+0.2') if obs_pre['board'][(r, c)] in [agent1, agent3]: reward += 0.3 nothing = False print_info('bomb -> enemy', '+0.3') # print(obs_pre['board'][(r, c)]) if obs_pre['board'][(r, c)] in [ incrrange, extrabomb, kick ]: reward -= 0.05 print_info('bomb -> powerup', '-0.05') if obs_pre['board'][(r, c)] in [teamate]: reward -= 0.05 print_info('bomb -> teammate', '-0.05') # if nothing: # reward -= 0.1 # # print(obs_pre['board'][(r, c)]) # print_info('Useless bomb', '-0.1') # 没有动 elif act_pre == 0: if obs_pre['position'] != goal_pre: print_info('obs_pre', obs_pre['position']) print_info('goal_pre', goal_pre) reward -= 0.1 # print(goal_pre) print_info('Faultal goal', '-0.1') # 如果是移动 else: # 踢炸弹获得奖励 if position_now != position_pre: if obs_pre['can_kick']: if obs_pre['board'][goal_pre] == bomb: reward += 0.01 print_info('Want to kick', '+0.01') if obs_pre['board'][position_now] == bomb: reward += 0.2 print_info('Kick', '+0.2') # 从安全位置进入到被炸弹波及之中 if bomb_life_pre[ position_pre] == 0 and bomb_life_now[position_now] > 0: reward -= 0.15 print_info('Enter the explosion range', '-0.15') # 被炸弹波及但是在向安全的位置移动 if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0: reward += 0.05 print_info('Escape from the explosin range ', '+0.05') # 向着items移动 if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]: reward += 0.01 print_info('Want a Item', '+0.01') # 吃到items if obs_pre['board'][position_now] in [ extrabomb, kick, incrrange ]: reward += 0.1 print_info('Eat a Item', '+0.1') # 吃到items elif obs_pre['board'][position_now] in [ extrabomb, kick, incrrange ]: reward += 0.05 print_info('Passing by a Item', '+0.05') if len(alive_agents) == 1: # An agent won. Give them +1, the other -1. print_info('Game Over', int(agents[0].is_alive)) return [reward + 2 * int(agent.is_alive) - 1 for agent in agents] elif step_count >= max_steps: # Game is over from time. Everyone gets -1. return [reward - 1] * 2 else: # Game running return [reward] * 2
def get_rewards_v3_8(agents, step_count, max_steps, whole_obs_pre, whole_obs, act_abs_pres, idx): # print_info('-------------------| Agent', str(idx) +' |-------------------') # print(act_abs_pres) def any_lst_equal(lst, values): """Checks if list are equal""" return any([lst == v for v in values]) alive_agents = [num for num, agent in enumerate(agents) \ if agent.is_alive] obs_pre = copy.deepcopy(whole_obs_pre[idx]) obs_now = copy.deepcopy(whole_obs[idx]) act_abs_pre = act_abs_pres[idx] position_pre = obs_pre['position'] position_now = obs_now['position'] bomb_life_now = feature_utils.get_bomb_life(obs_now) bomb_life_pre = feature_utils.get_bomb_life(obs_pre) my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now, position_now) extrabomb = constants.Item.ExtraBomb.value kick = constants.Item.Kick.value incrrange = constants.Item.IncrRange.value bomb = constants.Item.Bomb.value wood = constants.Item.Wood.value agent1 = constants.Item.Agent1.value agent3 = constants.Item.Agent3.value teamate = obs_pre['teammate'].value reward = 0 # 自己被炸死 if 0 < bomb_life_now[position_now] < 4: reward -= 1 print_info('You dead', '-1') act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre) # 这里只用来判断 goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre) # 如果是放bomb if act_pre == 5: # 没有ammo放bomb if obs_pre['ammo'] == 0: reward -= 0.1 print_info('No ammo', '-0.1') # 如果有ammo else: nothing = True # 放的bomb可以波及到wood/enemy for r in range(11): for c in range(11): if my_bomb_life_now[(r, c)] > 0: if obs_pre['board'][(r, c)] in [wood]: reward += 0.2 nothing = False print_info('bomb -> wood', '+0.2') if obs_pre['board'][(r, c)] in [agent1, agent3]: reward += 0.3 nothing = False print_info('bomb -> enemy', '+0.3') if obs_pre['board'][(r, c)] in [ incrrange, extrabomb, kick ]: reward -= 0.05 print_info('bomb -> powerup', '-0.05') if obs_pre['board'][(r, c)] in [teamate]: reward -= 0.05 print_info('bomb -> teammate', '-0.05') if nothing: reward -= 0.1 print_info('Useless bomb', '-0.1') # 没有动 elif act_pre == 0: if obs_pre['position'] != goal_pre: reward -= 0.1 print_info('Faultal goal', '-0.1') # 如果是移动 else: # 踢炸弹获得奖励 if position_now != position_pre: if obs_pre['can_kick']: if obs_pre['board'][goal_pre] == bomb: reward += 0.01 print_info('Want to kick', '+0.01') if obs_pre['board'][position_now] == bomb: reward += 0.2 print_info('Kick', '+0.2') # 从安全位置进入到被炸弹波及之中 if bomb_life_pre[ position_pre] == 0 and bomb_life_now[position_now] > 0: reward -= 0.15 print_info('Enter the explosion range', '-0.15') # 被炸弹波及但是在向安全的位置移动 if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0: reward += 0.05 print_info('Escape from the explosin range ', '+0.05') # 向着items移动 if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]: reward += 0.01 print_info('Want a Item', '+0.01') # 吃到items if obs_pre['board'][position_now] in [ extrabomb, kick, incrrange ]: reward += 0.1 print_info('Eat a Item', '+0.1') # 吃到items elif obs_pre['board'][position_now] in [ extrabomb, kick, incrrange ]: reward += 0.05 print_info('Passing by a Item', '+0.05') # We are playing a team game. if any_lst_equal(alive_agents, [[0, 2], [0], [2]]): # Team [0, 2] wins. print_info('Wins and agent0 alive.', reward + 1) return reward + 1 elif any_lst_equal(alive_agents, [[1, 3]]): # Team [1, 3] wins and no enemy dead. print_info('Loss and no enemy dead.', reward - 1) return reward - 1 elif any_lst_equal(alive_agents, [[1], [3]]): # Team [1, 3] wins and one enemy dead. print_info('Loss and one enemy dead.', reward - 0.6) return reward - 0.6 elif step_count >= max_steps and any_lst_equal( alive_agents, [[0, 1], [0, 1, 2], [0, 3], [0, 2, 3]]): # tie and one enemy dead. print_info('Tie and one enemy dead.', reward - 0.6) return reward - 0.6 elif step_count >= max_steps: # Game is over by max_steps. All agents tie. print_info('Game is over by max_steps. All agents tie.', reward - 1) return reward - 1 elif len(alive_agents) == 0: # Everyone's dead. All agents tie. print_info('Everyone is dead. All agents tie.', reward) return reward else: # No team has yet won or lost. return reward
def get_rewards_v3_7(agents, step_count, max_steps, whole_obs_pre, whole_obs, act_abs_pre): def any_lst_equal(lst, values): """Checks if list are equal""" return any([lst == v for v in values]) alive_agents = [num for num, agent in enumerate(agents) \ if agent.is_alive] obs_pre = copy.deepcopy(whole_obs_pre[0]) obs_now = copy.deepcopy(whole_obs[0]) position_pre = obs_pre['position'] position_now = obs_now['position'] bomb_life_now = feature_utils.get_bomb_life(obs_now) bomb_life_pre = feature_utils.get_bomb_life(obs_pre) my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now, position_now) extrabomb = constants.Item.ExtraBomb.value kick = constants.Item.Kick.value incrrange = constants.Item.IncrRange.value bomb = constants.Item.Bomb.value wood = constants.Item.Wood.value agent1 = constants.Item.Agent1.value agent3 = constants.Item.Agent3.value agent2 = constants.Item.Agent2.value e11_pre = feature_utils.extra_position(11, obs_pre['board']) e13_pre = feature_utils.extra_position(13, obs_pre['board']) e11_now = feature_utils.extra_position(11, obs_now['board']) e13_now = feature_utils.extra_position(13, obs_now['board']) reward = 0 # # 敌人被炸死 # if e11 is not None and 0 < bomb_life[e11] < 4: # reward += 0.5 # print_info('e11被炸死', '+0.5') # if e13 is not None and 0 < bomb_life[e13] < 4: # reward += 0.5 # print_info('e13被炸死', '+0.5') # 敌人从视野中消失: # if e11_now is None and e11_pre is not None: # reward -= 0.02 # print_info('敌人e11消失', '-0.01') # if e13_now is None and e13_pre is not None: # reward -= 0.02 # print_info('敌人e13消失', '-0.01') # if e11_pre is None and e11_now is not None: # reward += 0.01 # print_info('敌人e11出现', '+0.01') # if e13_pre is None and e13_now is not None: # reward += 0.01 # print_info('敌人e13出现', '+0.01') # 自己被炸死 if 0 < bomb_life_now[position_now] < 4: reward -= 1 print_info('You dead', '-1') act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre) # 这里只用来判断 goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre) # 如果是放bomb if act_pre == 5: # 没有ammo放bomb if obs_pre['ammo'] == 0: reward -= 0.1 print_info('No ammo', '-0.1') # 如果有ammo else: nothing = True # 放的bomb可以波及到wood/enemy for r in range(11): for c in range(11): if my_bomb_life_now[(r, c)] > 0: if obs_pre['board'][(r, c)] in [wood]: reward += 0.2 nothing = False print_info('bomb -> wood', '+0.2') if obs_pre['board'][(r, c)] in [agent1, agent3]: reward += 0.3 nothing = False print_info('bomb -> enemy', '+0.3') if obs_pre['board'][(r, c)] in [ incrrange, extrabomb, kick ]: reward -= 0.05 print_info('bomb -> powerup', '-0.05') if obs_pre['board'][(r, c)] in [agent2]: reward -= 0.05 print_info('bomb -> teammate', '-0.05') if nothing: reward -= 0.1 print_info('Useless bomb', '-0.1') # 没有动 elif act_pre == 0: if obs_pre['position'] != goal_pre: reward -= 0.1 print_info('Faultal goal', '-0.1') # 如果是移动 else: # r_pre, c_pre = position_pre # r_now, c_now = position_now # r_to = r_now - r_pre # c_to = c_now - c_pre # if (r_to, c_to) == (-1, 0): act_pre = 1 # if (r_to, c_to) == (1, 0): act_pre = 2 # if (r_to, c_to) == (0, -1): act_pre = 3 # if (r_to, c_to) == (0, 1): act_pre = 4 # 有效的移动 # reward += 0.001 # print_info('有效的移动', '+0.001') # 踢炸弹获得奖励 if position_now != position_pre: if obs_pre['can_kick']: if obs_pre['board'][goal_pre] == bomb: reward += 0.01 print_info('Want to kick', '+0.01') if obs_pre['board'][position_now] == bomb: reward += 0.2 print_info('Kick', '+0.2') # 从安全位置进入到被炸弹波及之中 if bomb_life_pre[ position_pre] == 0 and bomb_life_now[position_now] > 0: reward -= 0.15 print_info('Enter the explosion range', '-0.15') # 被炸弹波及但是在向安全的位置移动 if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0: reward += 0.05 print_info('Escape from the explosin range ', '+0.05') # 向着items移动 if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]: reward += 0.01 print_info('Want a Item', '+0.01') # 吃到items if obs_pre['board'][position_now] in [ extrabomb, kick, incrrange ]: reward += 0.1 print_info('Eat a Item', '+0.1') # 吃到items elif obs_pre['board'][position_now] in [ extrabomb, kick, incrrange ]: reward += 0.05 print_info('Passing by a Item', '+0.05') # We are playing a team game. if any_lst_equal(alive_agents, [[0, 2], [0], [2]]): # Team [0, 2] wins. print_info('Wins and agent0 alive.', reward + 1) return [reward + 1, -1, 1, -1] elif any_lst_equal(alive_agents, [[1, 3]]): # Team [1, 3] wins and no enemy dead. print_info('Loss and no enemy dead.', reward - 1) return [reward - 1, 1, -1, 1] elif any_lst_equal(alive_agents, [[1], [3]]): # Team [1, 3] wins and one enemy dead. print_info('Loss and one enemy dead.', reward - 0.6) return [reward - 0.6, 1, -1, 1] elif step_count >= max_steps and any_lst_equal( alive_agents, [[0, 1], [0, 1, 2], [0, 3], [0, 2, 3]]): # tie and one enemy dead. print_info('Tie and one enemy dead.', reward - 0.6) return [reward - 0.6, 1, -1, 1] elif step_count >= max_steps: # Game is over by max_steps. All agents tie. print_info('Game is over by max_steps. All agents tie.', reward - 1) return [reward - 1] * 4 elif len(alive_agents) == 0: # Everyone's dead. All agents tie. print_info('Everyone is dead. All agents tie.', reward) return [reward] * 4 else: # No team has yet won or lost. return [reward] * 4
def get_rewards_v3_6(agents, step_count, max_steps, whole_obs_pre, whole_obs, act_abs_pre): def any_lst_equal(lst, values): """Checks if list are equal""" return any([lst == v for v in values]) alive_agents = [num for num, agent in enumerate(agents) \ if agent.is_alive] obs_pre = copy.deepcopy(whole_obs_pre[0]) obs_now = copy.deepcopy(whole_obs[0]) position_pre = obs_pre['position'] position_now = obs_now['position'] bomb_life_now = feature_utils.get_bomb_life(obs_now) bomb_life_pre = feature_utils.get_bomb_life(obs_pre) my_bomb_life_now = feature_utils.get_my_bomb_life(bomb_life_now, position_now) extrabomb = constants.Item.ExtraBomb.value kick = constants.Item.Kick.value incrrange = constants.Item.IncrRange.value wood = constants.Item.Wood.value agent1 = constants.Item.Agent1.value agent3 = constants.Item.Agent3.value e11_now = feature_utils.extra_position(11, obs_now['board']) e13_now = feature_utils.extra_position(13, obs_now['board']) reward = 0 # # 敌人被炸死 # if e11 is not None and 0 < bomb_life[e11] < 4: # reward += 0.5 # print_info('e11被炸死', '+0.5') # if e13 is not None and 0 < bomb_life[e13] < 4: # reward += 0.5 # print_info('e13被炸死', '+0.5') # 自己被炸死 if 0 < bomb_life_now[position_now] < 4: reward -= 0.5 print_info('自己被炸死', '-0.5') act_pre = feature_utils._djikstra_act(obs_pre, act_abs_pre) # 这里只用来判断 goal_pre = feature_utils.extra_goal(act_abs_pre, obs_pre) # 如果是放bomb if act_pre == 5: # 没有ammo放bomb if obs_pre['ammo'] == 0: reward -= 0.1 print_info('没有ammo放炸弹', '-0.1') # 如果有ammo else: # 放的bomb可以波及到wood/enemy for r in range(11): for c in range(11): if my_bomb_life_now[(r, c)] > 0: if obs_pre['board'][(r, c)] in [wood]: reward += 0.2 print_info('炸弹波及到wood', '+0.2') if obs_pre['board'][(r, c)] in [agent1, agent3]: reward += 0.3 print_info('炸弹波及到敌人', '+0.3') # 没有动 elif act_pre == 0: if obs_pre['position'] != goal_pre: reward -= 0.01 print_info('无效移动', '-0.01') # 如果是移动 else: # 有效的移动 # reward += 0.001 # print_info('有效的移动', '+0.001') # 被炸弹波及但是在向安全的位置移动 if bomb_life_pre[position_pre] > 0 and bomb_life_pre[goal_pre] == 0: reward += 0.05 print_info('被炸弹波及向着安全的位置移动', '+0.05') # 向着items移动 if obs_pre['board'][goal_pre] in [extrabomb, kick, incrrange]: reward += 0.01 print_info('向items移动', '+0.01') # 吃到items if obs_pre['board'][position_now] in [extrabomb, kick, incrrange]: reward += 0.3 print_info('向着item移动并吃到items', '+0.2') # 吃到items elif obs_pre['board'][position_now] in [extrabomb, kick, incrrange]: reward += 0.05 print_info('路过吃到items', '+0.05') # We are playing a team game. if any_lst_equal(alive_agents, [[0, 2], [0], [2]]): # Team [0, 2] wins. print_info('Team [0, 2] wins and agent0 alive.', reward + 1) return [reward + 1, -1, 1, -1] elif any_lst_equal(alive_agents, [[1, 3]]): # Team [1, 3] wins and no enemy dead. print_info('Team [1, 3] wins and no enemy dead.', reward - 1) return [reward - 1, 1, -1, 1] elif any_lst_equal(alive_agents, [[1], [3]]): # Team [1, 3] wins and one enemy dead. print_info('Team [1, 3] wins and one enemy dead.', reward + 0.5) return [reward + 0.5, 1, -1, 1] elif step_count >= max_steps and any_lst_equal( alive_agents, [[0, 1], [0, 1, 2], [0, 3], [0, 2, 3]]): # tie and one enemy dead. print_info('tie and one enemy dead.', reward + 0.5) return [reward + 0.5, 1, -1, 1] elif step_count >= max_steps: # Game is over by max_steps. All agents tie. print_info('Game is over by max_steps. All agents tie.', reward - 1) return [reward - 1] * 4 elif len(alive_agents) == 0: # Everyone's dead. All agents tie. print_info('Everyone is dead. All agents tie.', reward + 0.5) return [reward + 0.5] * 4 else: # No team has yet won or lost. return [reward] * 4