Example #1
0
 def get_action(selected,
                state_info,
                hero,
                hero_name,
                rival_hero,
                revert=False):
     if selected < 8:  # move
         fwd = StateUtil.mov(selected, revert)
         tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15,
                               hero.pos.y + fwd.y * 15,
                               hero.pos.z + fwd.z * 15)
         action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None,
                            tgtpos, None, None, selected, None)
         return action
     elif selected < 18:  # 对敌英雄,塔,敌小兵1~8使用普攻
         if selected == 8:  # 敌方塔
             tower = StateUtil.get_nearest_enemy_tower(
                 state_info, hero_name, StateUtil.ATTACK_UNIT_RADIUS)
             tgtid = tower.unit_name
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
         elif selected == 9:  # 敌方英雄
             tgtid = rival_hero
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
         else:  # 小兵
             creeps = StateUtil.get_nearby_enemy_units(
                 state_info, hero_name)
             n = selected - 10
             tgtid = creeps[n].unit_name
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
     elif selected < 48:  # skill
         skillid = int((selected - 18) / 10 + 1)
         [tgtid, tgtpos] = LineModel.choose_skill_target(
             selected - 18 - (skillid - 1) * 10, state_info, skillid,
             hero_name, hero.pos, rival_hero)
         if tgtpos is None:
             fwd = None
         else:
             fwd = tgtpos.fwd(hero.pos)
         action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid,
                            tgtpos, fwd, None, selected, None)
         return action
     elif selected == 48:  # hold
         # print("轮到了48号行为-hold")
         action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None,
                            hero.pos, None, None, 48, None)
         return action
     else:  # 撤退
         retreat_pos = StateUtil.get_retreat_pos(state_info,
                                                 hero,
                                                 line_index=1)
         action = CmdAction(hero_name, CmdActionEnum.RETREAT, None, None,
                            retreat_pos, None, None, selected, None)
         return action
Example #2
0
 def keep_away_from(state_info, hero_info, rival_hero_info, action_ratios,
                    danger_pos, danger_radius):
     changed = False
     maxQ = max(action_ratios)
     selected = action_ratios.index(maxQ)
     if maxQ == -1:
         return action_ratios
     for selected in range(len(action_ratios)):
         if action_ratios[selected] == -1:
             continue
         if selected < 8:
             fwd = StateUtil.mov(selected)
             tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 0.5,
                                   hero_info.pos.y + fwd.y * 0.5,
                                   hero_info.pos.z + fwd.z * 0.5)
             if StateUtil.cal_distance(tgtpos, danger_pos) <= danger_radius:
                 print('策略选择', state_info.battleid, hero_info.hero_name,
                       '移动方向会进入危险区域', hero_info.pos.to_string(),
                       tgtpos.to_string())
                 action_ratios[selected] = -1
         elif selected < 18:  # 对敌英雄,塔,敌小兵1~8使用普攻, 针对近战英雄的检测
             if selected == 8:  # 敌方塔
                 print('策略选择', state_info.battleid, hero_info.hero_name,
                       '不要去攻击塔')
                 action_ratios[selected] = -1
             elif selected == 9:  # 敌方英雄
                 if StateUtil.cal_distance(rival_hero_info.pos,
                                           danger_pos) <= danger_radius:
                     print('策略选择', state_info.battleid, hero_info.hero_name,
                           '不要去近身攻击塔范围内的英雄')
                     action_ratios[selected] = -1
             else:  # 小兵
                 creeps = StateUtil.get_nearby_enemy_units(
                     state_info, hero_info.hero_name)
                 n = selected - 10
                 tgt = creeps[n]
                 if StateUtil.cal_distance(tgt.pos,
                                           danger_pos) <= danger_radius:
                     print('策略选择', state_info.battleid, hero_info.hero_name,
                           '不要去近身攻击塔范围内的小兵')
                     action_ratios[selected] = -1
         elif hero_info.cfg_id == '101' and 28 <= selected < 38:  # 专门针对查尔斯的跳跃技能
             skillid = int((selected - 18) / 10 + 1)
             [tgtid, tgtpos] = LineModel.choose_skill_target(
                 selected - 18 - (skillid - 1) * 10, state_info, skillid,
                 hero_info.hero_name, hero_info.pos,
                 rival_hero_info.hero_name)
             if tgtpos is not None:
                 if StateUtil.cal_distance(tgtpos,
                                           danger_pos) <= danger_radius:
                     print('策略选择', state_info.battleid, hero_info.hero_name,
                           '跳跃技能在朝着塔下的目标')
                     action_ratios[selected] = -1
     return action_ratios
Example #3
0
 def __init__(self, hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred=0, avail_action=True):
     self.hero_name = hero_name
     self.action = action
     self.skillid = str(skillid)
     self.tgtid = str(tgtid)
     # 这里需要确保进来的位置是整数
     self.tgtpos = PosStateInfo(int(tgtpos.x), int(tgtpos.y), int(tgtpos.z)) if tgtpos is not None else None
     self.fwd = fwd
     self.itemid = itemid
     self.output_index = output_index
     self.reward = reward
     self.vpred = vpred  # for ppo
     self.avail_action = avail_action
     self.gold = 0
     self.lv = 0
Example #4
0
 def set_move_target(hero_info, fwd, time_second=0.5):
     # base = StateUtil.get_basement(hero_info)
     # return base
     return PosStateInfo(
         hero_info.pos.x + time_second * fwd.x * hero_info.speed / 1000 * 3,
         -80,
         hero_info.pos.z + time_second * fwd.z * hero_info.speed / 1000 * 3)
Example #5
0
 def decode(obj, unit_name):
     unit_name = unit_name
     state = obj['state'] if 'state' in obj else None
     cfg_id = obj['cfgID'] if 'cfgID' in obj else None
     pos = PosStateInfo.decode(obj['pos']) if 'pos' in obj else None
     fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None
     hp = obj['hp'] if 'hp' in obj else None
     maxhp = obj['maxhp'] if 'maxhp' in obj else None
     speed = obj['speed'] if 'speed' in obj else None
     moving = obj['moving'] if 'moving' in obj else None
     chrtype = obj['chrtype'] if 'chrtype' in obj else None
     att = obj['att'] if 'att' in obj else None
     attspeed = obj['attspeed'] if 'attspeed' in obj else None
     mag = obj['mag'] if 'mag' in obj else None
     attpen = obj['attpen'] if 'attpen' in obj else None
     magpen = obj['magpen'] if 'magpen' in obj else None
     attpenrate = obj['attpenrate'] if 'attpenrate' in obj else None
     magpenrate = obj['magpenrate'] if 'magpenrate' in obj else None
     movelock = obj['movelock'] if 'movelock' in obj else None
     vis1 = obj['vis1'] if 'vis1' in obj else None
     vis2 = obj['vis2'] if 'vis2' in obj else None
     vis3 = obj['vis3'] if 'vis3' in obj else None
     team = obj['team'] if 'team' in obj else (None if pos is None else (0 if pos.x < 0 else 1))
     return UnitStateInfo(unit_name, state, cfg_id, pos, fwd, hp, maxhp, speed, moving, chrtype, att,
                          attspeed, mag, attpen, magpen, attpenrate, magpenrate, movelock, vis1, vis2, vis3, team)
Example #6
0
 def cal_soldier_wave_point(state_info, unit_index_list):
     cached_x = 0
     cached_z = 0
     for unit_name in unit_index_list:
         unit = state_info.get_unit(unit_name)
         cached_x += unit.pos.x
         cached_z += unit.pos.z
     return PosStateInfo(int(cached_x / len(unit_index_list)), int(-80),
                         int(cached_z / len(unit_index_list)))
Example #7
0
 def get_hp_restore_place(state_info, hero):
     for unit in state_info.units:
         if unit.team == hero.team and (unit.pos.x == 17110
                                        or unit.pos.x == -17110):
             # 移动到塔后侧
             near_tower_x = unit.pos.x - 3000 if hero.team == 0 else unit.pos.x + 3000
             pos = PosStateInfo(near_tower_x, unit.pos.y, unit.pos.z)
             return pos
     return None
Example #8
0
 def decode(obj):
     hero_name = obj['hero_name']
     action = obj['action']
     skillid = obj['skillid'] if 'skillid' in obj else None
     tgtid = obj['tgtid'] if 'tgtid' in obj else None
     tgtpos = PosStateInfo.decode(obj['tgtpos']) if 'tgtpos' in obj else None
     fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None
     itemid = obj['itemid'] if 'itemid' in obj else None
     output_index = obj['output_index'] if 'output_index' in obj else None
     reward = obj['reward'] if 'reward' in obj else None
     vpred = obj['vpred'] if 'vpred' in obj else None
     return CmdAction(hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred)
Example #9
0
 def policy_move_retreat(hero_info):
     if hero_info.team == 0:
         mov_idx = 6
     else:
         mov_idx = 0
     fwd = StateUtil.mov(mov_idx)
     tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15,
                           hero_info.pos.y + fwd.y * 15,
                           hero_info.pos.z + fwd.z * 15)
     action = CmdAction(hero_info.hero_name, CmdActionEnum.MOVE, None, None,
                        tgtpos, None, None, mov_idx, None)
     return action
Example #10
0
 def get_attack_tower_action(hero_name, hero_info, tower_unit):
     # 因为目前模型中侦测塔的范围较大,可能出现攻击不到塔的情况
     # 所以需要先接近塔
     # 使用tgtpos,而不是fwd。move命令中fwd坐标系比较奇怪
     if StateUtil.cal_distance(
             hero_info.pos, tower_unit.pos) > StateUtil.ATTACK_UNIT_RADIUS:
         fwd = tower_unit.pos.fwd(hero_info.pos)
         [fwd, output_index] = Replayer.get_closest_fwd(fwd)
         tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15,
                               hero_info.pos.y + fwd.y * 15,
                               hero_info.pos.z + fwd.z * 15)
         print("朝塔移动,", hero_name, "hero_pos", hero_info.pos.to_string(),
               "tower_pos", tower_unit.pos.to_string(), "fwd",
               fwd.to_string(), "output_index", output_index)
         action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None,
                            tgtpos, None, None, output_index, None)
     else:
         action_idx = 11
         action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0,
                            tower_unit.unit_name, None, None, None,
                            action_idx, None)
     return action
Example #11
0
class CmdAction(object):
    def __init__(self, hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred=0, avail_action=True):
        self.hero_name = hero_name
        self.action = action
        self.skillid = str(skillid)
        self.tgtid = str(tgtid)
        # 这里需要确保进来的位置是整数
        self.tgtpos = PosStateInfo(int(tgtpos.x), int(tgtpos.y), int(tgtpos.z)) if tgtpos is not None else None
        self.fwd = fwd
        self.itemid = itemid
        self.output_index = output_index
        self.reward = reward
        self.vpred = vpred  # for ppo
        self.avail_action = avail_action
        self.gold = 0
        self.lv = 0

    @staticmethod
    def decode(obj):
        hero_name = obj['hero_name']
        action = obj['action']
        skillid = obj['skillid'] if 'skillid' in obj else None
        tgtid = obj['tgtid'] if 'tgtid' in obj else None
        tgtpos = PosStateInfo.decode(obj['tgtpos']) if 'tgtpos' in obj else None
        fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None
        itemid = obj['itemid'] if 'itemid' in obj else None
        output_index = obj['output_index'] if 'output_index' in obj else None
        reward = obj['reward'] if 'reward' in obj else None
        vpred = obj['vpred'] if 'vpred' in obj else None
        return CmdAction(hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred)

    def encode(self):
        json_map = {'hero_name': self.hero_name, 'action': self.action, 'skillid': self.skillid, 'tgtid': self.tgtid, \
                    'itemid':self.itemid, 'output_index': self.output_index, 'reward': self.reward, 'vpred': self.vpred}
        if self.tgtpos is not None:
            json_map['tgtpos'] = self.tgtpos.encode()
        if self.fwd is not None:
            json_map['fwd'] = self.fwd.encode()
        return dict((k, v) for k, v in json_map.items() if v is not None)
Example #12
0
 def get_retreat_pos(state_info, hero, line_index):
     towers = []
     for unit in state_info.units:
         if StateUtil.if_unit_tower(
                 unit.unit_name) and unit.team == hero.team:
             if StateUtil.if_in_line(unit, line_index) >= 0:
                 # 在英雄后面的塔
                 if hero.team == 0 and hero.pos.x > unit.pos.x:
                     towers.append(unit)
                 elif hero.team == 1 and hero.pos.x < unit.pos.x:
                     towers.append(unit)
     if len(towers) > 0:
         towers.sort(key=lambda t: math.fabs(hero.pos.x - t.pos.x),
                     reverse=False)
         near_tower = towers[0]
         # 移动到塔后侧
         near_tower_x = near_tower.pos.x - 3000 if hero.team == 0 else near_tower.pos.x + 3000
         near_tower_z = near_tower.pos.z - 2000 if hero.team == 0 else near_tower.pos.z + 2000
         pos = PosStateInfo(near_tower_x, near_tower.pos.y, near_tower_z)
         return pos
     else:
         basement_pos = StateUtil.BASEMENT_TEAM_1 if hero.team == 1 else StateUtil.BASEMENT_TEAM_0
         return basement_pos
Example #13
0
 def play_move(hero_info, fwd, time_second=0.5):
     return PosStateInfo(
         hero_info.pos.x +
         time_second * fwd.x * hero_info.speed / 1000 * 1.2, -80,
         hero_info.pos.z +
         time_second * fwd.z * hero_info.speed / 1000 * 1.2)
Example #14
0
    def decode(obj, hero_name):
        hero_name = hero_name
        state = obj['state'] if 'state' in obj else None
        cfg_id = obj['cfgID'] if 'cfgID' in obj else None
        pos = PosStateInfo.decode(obj['pos']) if 'pos' in obj else None
        fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None
        hp = obj['hp'] if 'hp' in obj else None
        maxhp = obj['maxhp'] if 'maxhp' in obj else None
        maxmp = obj['maxmp'] if 'maxmp' in obj else None

        #TODO 如果没有信息,mp默认等于0?
        mp = obj['mp'] if 'mp' in obj else None
        speed = obj['speed'] if 'speed' in obj else None
        att = obj['att'] if 'att' in obj else None
        gold = obj['gold'] if 'gold' in obj else None
        hprec = obj['Hprec'] if 'Hprec' in obj else None

        # 是否可见信息(下路阵营,上路阵营,中立生物是否可见)
        vis1 = obj['vis1'] if 'vis1' in obj else None
        vis2 = obj['vis2'] if 'vis2' in obj else None
        vis3 = obj['vis3'] if 'vis3' in obj else None

        # 更新字段
        attspeed = obj['attspeed'] if 'attspeed' in obj else None
        mag = obj['mag'] if 'mag' in obj else None
        attpen = obj['attpen'] if 'attpen' in obj else None
        magpen = obj['magpen'] if 'magpen' in obj else None
        attpenrate = obj['attpenrate'] if 'attpenrate' in obj else None
        magpenrate = obj['magpenrate'] if 'magpenrate' in obj else None
        movelock = obj['movelock'] if 'movelock' in obj else None

        equips = []
        if 'equip0' in obj:
            equips.append(EquipStateInfo.decode(obj['equip0'], 'equip0'))
        if 'equip1' in obj:
            equips.append(EquipStateInfo.decode(obj['equip1'], 'equip1'))
        if 'equip2' in obj:
            equips.append(EquipStateInfo.decode(obj['equip2'], 'equip2'))
        if 'equip3' in obj:
            equips.append(EquipStateInfo.decode(obj['equip3'], 'equip3'))
        if 'equip4' in obj:
            equips.append(EquipStateInfo.decode(obj['equip4'], 'equip4'))
        if 'equip5' in obj:
            equips.append(EquipStateInfo.decode(obj['equip5'], 'equip5'))
        if 'equip6' in obj:
            equips.append(EquipStateInfo.decode(obj['equip6'], 'equip6'))
        if 'equip7' in obj:
            equips.append(EquipStateInfo.decode(obj['equip7'], 'equip7'))

        buffs = obj['buff'] if 'buff' in obj else []

        skills = []
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill0')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill1')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill2')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill3')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill4')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill5')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill6')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill7')
        if skill_info is not None:
            skills.append(skill_info)
        skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill8')
        if skill_info is not None:
            skills.append(skill_info)

        # 根据其实位置来决定英雄阵营,注意,这里的判断只有在第一帧时候是合理的,后续的其实应该根据merge来判断
        # 上路是team0,下路team1
        team = obj['team'] if 'team' in obj else (None if pos is None else
                                                  (0 if pos.x < 0 else 1))

        return HeroStateInfo(hero_name, state, cfg_id, pos, fwd, hp, maxhp, mp,
                             maxmp, speed, att, gold, hprec, equips, buffs,
                             skills, vis1, vis2, vis3, attspeed, mag, attpen,
                             magpen, attpenrate, magpenrate, movelock, team)
Example #15
0
 def get_tower_behind(tower_info, hero, line_index):
     near_tower_x = tower_info.pos.x - 4000 if hero.team == 0 else tower_info.pos.x + 4000
     pos = PosStateInfo(near_tower_x, tower_info.pos.y, tower_info.pos.z)
     return pos
Example #16
0
class TeamBattleTrainer:

    BATTLE_POINT_X = 0
    BATTLE_POINT_Z = -31000
    BATTLE_CIRCLE = PosStateInfo(BATTLE_POINT_X, 0, BATTLE_POINT_Z)
    BATTLE_CIRCLE_RADIUS_BATTLE_START = 8
    BATTLE_CIRCLE_RADIUS_BATTLE_ING = 10
    SHRINK_TIME = 60

    def __init__(self, act_size, save_root_path, battle_id, model_util, gamma,
                 enable_policy):
        self.act_size = act_size
        self.battle_id = battle_id
        self.model_util = model_util
        self.state_cache = []
        self.heros = [
            '27', '28', '29', '30', '31', '32', '33', '34', '35', '36'
        ]
        self.raw_log_file = open(
            save_root_path + '/raw_' + str(battle_id) + '.log', 'w')
        self.dead_heroes = []
        self.battle_started = -1
        self.model_caches = {}
        self.rebooting = False
        self.enable_policy = enable_policy
        for hero in self.heros:
            self.model_caches[hero] = TEAM_PPO_CACHE(gamma)

        # 计算奖励值时候因为要看历史数据,所以需要这两个当时的状态信息。后续可以考虑如何避免这种缓存
        self.battle_heroes_cache = []
        self.dead_heroes_cache = []
        self.data_inputs = []

    def save_raw_log(self, raw_log_str):
        self.raw_log_file.write(
            strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + raw_log_str +
            "\n")
        self.raw_log_file.flush()

    def build_response(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(
            self.state_cache) > 0 else None
        response_strs = []

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return {"ID": raw_state_info.battleid, "tick": -1}

        if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (
                prev_state_info is None
                or prev_state_info.tick > raw_state_info.tick):
            print("clear")
            prev_state_info = None
            self.state_cache = []
            self.battle_started = -1
            self.battle_heroes_cache = []
            self.dead_heroes = []
            self.dead_heroes_cache = []
            self.data_inputs = []
            self.rebooting = False
        elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE:
            # 不是开始帧的话直接返回重启游戏
            # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗
            print("battle_id", self.battle_id, "tick", raw_state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        state_info = StateUtil.update_state_log(prev_state_info,
                                                raw_state_info)
        hero = state_info.get_hero("27")

        if hero is None or hero.hp is None:
            # 偶然情况处理,如果找不到英雄,直接重开
            print("battle_id", self.battle_id, "tick", state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        # 战斗前准备工作
        if len(self.state_cache) == 0:
            # 第一帧的时候,添加金钱和等级
            for hero in self.heros:
                add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None,
                                         None, None, None, None, None, None)
                add_gold_cmd.gold = 3000
                add_gold_str = StateUtil.build_command(add_gold_cmd)
                response_strs.append(add_gold_str)

                add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None,
                                       None, None, None, None, None)
                add_lv_cmd.lv = 9
                add_lv_str = StateUtil.build_command(add_lv_cmd)
                response_strs.append(add_lv_str)
        elif len(self.state_cache) > 1:
            # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧
            for hero in self.heros:
                upgrade_cmd = self.upgrade_skills(state_info, hero)
                if upgrade_cmd is not None:
                    response_strs.append(upgrade_cmd)

                buy_cmd = self.buy_equip(state_info, hero)
                if buy_cmd is not None:
                    response_strs.append(buy_cmd)

        for hero in self.heros:
            # 判断是否英雄死亡
            if prev_state_info is not None:
                dead = StateUtil.if_hero_dead(prev_state_info, state_info,
                                              hero)
                if dead == 1 and hero not in self.dead_heroes:
                    print("battle_id", self.battle_id, "tick", state_info.tick,
                          "英雄死亡", hero, "tick", state_info.tick)
                    self.dead_heroes.append(hero)

        # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定
        # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动
        #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来

        # 这里会排除掉死亡的英雄,他们不需要再加入团战
        # 团战范围在收缩
        battle_range = self.cal_battle_range(
            len(self.state_cache) - self.battle_started)
        heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range(
            state_info, self.heros, self.dead_heroes, battle_range)

        # 存活英雄
        battle_heros = list(heroes_in_range)
        battle_heros.extend(heroes_out_range)

        # 缓存参战情况和死亡情况,用于后续训练
        self.battle_heroes_cache.append(battle_heros)
        self.dead_heroes_cache.append(list(self.dead_heroes))

        if state_info.tick >= 142560:
            debuginfo = True

        # 团战还没有开始,有英雄还在圈外
        if len(heroes_out_range) > 0:
            if self.battle_started > -1:
                print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外",
                      ','.join(heroes_out_range), "battle_range", battle_range)

            # 移动到两个开始战斗地点附近
            # 如果是团战开始之后,移动到团战中心点
            for hero in heroes_out_range:
                start_point_x = randint(0, 8000)
                start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0
                start_point_z += randint(-4000, 4000)
                if TeamBattleUtil.get_hero_team(hero) == 0:
                    start_point_z *= -1
                start_point_z += TeamBattleTrainer.BATTLE_POINT_Z
                tgt_pos = PosStateInfo(start_point_x, 0, start_point_z)
                move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None,
                                        tgt_pos, None, None, None, None)
                mov_cmd_str = StateUtil.build_command(move_action)
                response_strs.append(mov_cmd_str)
        # 团战已经开始
        elif not self.rebooting:
            if self.battle_started == -1:
                self.battle_started = len(self.state_cache)

            # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态
            state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible(
                state_info, self.state_cache)

            # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range)
            # 跟队伍,每个队伍得到行为
            team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range)
            team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team(
                state_info, team_a, heroes_in_range)
            team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team(
                state_info, team_b, heroes_in_range)

            # 如果模型已经开战,重启战斗
            if (model_upgrade_a or model_upgrade_b
                ) and self.battle_started < len(self.state_cache) + 1:
                print("battle_id", self.battle_id, "因为模型升级,重启战斗",
                      self.battle_started, len(self.state_cache))
                action_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
                rsp_obj = {
                    "ID": raw_state_info.battleid,
                    "tick": raw_state_info.tick,
                    "cmd": action_strs
                }
                rsp_str = JSON.dumps(rsp_obj)
                return rsp_str
            data_input_map = {}
            for action_cmd, data_input in zip(team_actions_a + team_actions_b,
                                              input_list_a + input_list_b):
                action_str = StateUtil.build_command(action_cmd)
                response_strs.append(action_str)
                state_info.add_action(action_cmd)
                data_input_map[action_cmd.hero_name] = data_input

            # 缓存所有的模型输入,用于后续训练
            self.data_inputs.append(data_input_map)

        # 添加记录到缓存中
        self.state_cache.append(state_info)

        # 将模型行为加入训练缓存,同时计算奖励值
        # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟
        last_x_index = 2
        if self.battle_started > -1 and len(self.data_inputs) >= last_x_index:
            if self.rebooting:
                # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启")

                # 重启游戏
                response_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
            else:
                state_index = len(self.state_cache) - last_x_index
                win, win_team, left_heroes = self.remember_replay_heroes(
                    -last_x_index, state_index, battle_range)

                # 团战结束条件
                # 首先战至最后一人
                # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range)
                # if self.battle_started:
                #     if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1):
                if win == 1:
                    # 重启游戏
                    print('battle_id', self.battle_id, "重启游戏", "剩余人员",
                          ','.join(left_heroes))
                    response_strs = [
                        StateUtil.build_action_command('27', 'RESTART', None)
                    ]
                    self.rebooting = True
        # battle_heros = self.search_team_battle(state_info)
        # if len(battle_heros) > 0:
        #     print("team battle heros", ';'.join(battle_heros))
        #
        # heros_need_model = []
        # for hero in self.heros:
        #     # 判断是否英雄死亡
        #     if prev_state_info is not None:
        #         dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero)
        #         if dead == 1 and hero not in self.dead_heroes:
        #             self.dead_heroes.append(hero)
        #
        #     # 复活的英雄不要再去参团
        #     if hero in self.dead_heroes:
        #         continue
        #
        #     # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE)
        #     if hero not in battle_heros:
        #         # 移动到团战点附近,添加部分随机
        #         rdm_delta_x = randint(0, 1000)
        #         rdm_delta_z = randint(0, 1000)
        #         tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z)
        #         move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None)
        #         mov_cmd_str = StateUtil.build_command(move_action)
        #         response_strs.append(mov_cmd_str)
        #     else:
        #         # 启动模型决策
        #         heros_need_model.append(hero)
        #
        # if len(heros_need_model) > 0:
        #     action_cmds = self.get_model_actions(state_info, heros_need_model)
        #     for action_cmd in action_cmds:
        #         action_str = StateUtil.build_command(action_cmd)
        #         response_strs.append(action_str)
        #         state_info.add_action(action_cmd)

        #TODO 记录模型输出,用于后续训练

        # 返回结果给游戏端
        rsp_obj = {
            "ID": state_info.battleid,
            "tick": state_info.tick,
            "cmd": response_strs
        }
        rsp_str = JSON.dumps(rsp_obj)
        print('battle_id', self.battle_id, 'response', rsp_str)
        return rsp_str

    def cal_battle_range(self, action_times):
        battle_range = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START if self.battle_started == -1 \
            else TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_ING - int(action_times / TeamBattleTrainer.SHRINK_TIME)
        return battle_range

    # last_x_index 表示这是倒数第x个状态,这里不用准确数字而是用-1、-2是因为state_cache,data_inputs长度不同
    # state_index 表示状态在帧缓存中的位置,用于计算奖励值折旧时候使用
    def remember_replay_heroes(self, last_x_index, state_index, battle_range):
        prev_state = self.state_cache[last_x_index - 1]
        state_info = self.state_cache[last_x_index]
        next_state = self.state_cache[last_x_index + 1]
        battle_heroes = self.battle_heroes_cache[last_x_index]
        dead_heroes = self.dead_heroes_cache[last_x_index]
        data_input_map = self.data_inputs[last_x_index]

        # 计算奖励值情况
        state_info, win, win_team, left_heroes = self.model_util.cal_rewards(
            prev_state, state_info, next_state, battle_heroes, dead_heroes)
        print("battle_id", self.battle_id, "tick", state_info.tick,
              "remember_replay_heroes", "win", win, "剩余人员",
              ','.join(left_heroes), "输入—战斗人员", ','.join(battle_heroes),
              "输入—阵亡人员", ','.join(dead_heroes))

        # 设置一场战斗的最大游戏时长,到时直接重启,所有玩家最终奖励为零,没有输赢
        if win == 0 and battle_range <= 0:
            print('battle_id', self.battle_id, "到达游戏最大时长,直接重启,需要确认是否有异常情况")
            win = 1

        for action in state_info.actions:
            # 行为有可能为空,比如英雄已经挂了,但是他最后的动作在后续几帧都可能有影响,也有可能是因为
            # print('battle_id', self.battle_id, "remember_replay_heroes", action.hero_name)
            data_input = data_input_map[
                action.
                hero_name] if action.action != CmdActionEnum.EMPTY else None
            self.remember_train_data(state_info, state_index, data_input,
                                     action.hero_name, win)

        # 如果战斗结束,需要训练所有模型
        if win == 1:
            for hero_name in self.heros:
                model_cache = self.model_caches[hero_name]
                o4r, batch_size = model_cache.output4replay()

                # 提交给训练模块
                print('battle_id', self.battle_id, 'trainer', hero_name,
                      '添加训练集', batch_size)
                if o4r is None:
                    print('battle_id', self.battle_id, "训练数据异常")
                else:
                    self.model_util.set_train_data(hero_name, self.battle_id,
                                                   o4r, batch_size)
                    model_cache.clear_cache()
        return win, win_team, left_heroes

    # 保存训练数据,计算行为奖励,触发训练
    #TODO 在游戏重启时候需要同时训练所有的模型
    def remember_train_data(self, state_info, state_index, data_input,
                            hero_name, new):
        hero_act = state_info.get_hero_action(hero_name)
        model_cache = self.model_caches[hero_name]

        if hero_act is not None:
            if hero_act.reward is None:
                print("Error", 'battle_id', self.battle_id, hero_act.hero_name,
                      hero_act.action, hero_act.skillid)
                return
            # prev_new 简单计算,可能会有问题
            prev_new = model_cache.get_prev_new()
            ob = data_input
            ac = hero_act.output_index
            vpred = hero_act.vpred
            rew = hero_act.reward
            model_cache.remember(ob, ac, vpred, new, rew, prev_new,
                                 state_index, self.battle_id, hero_name)

    @staticmethod
    def all_in_battle_range(state_info, all_heroes, dead_heroes, battle_range):
        heroes_in = []
        heroes_out = []
        for hero in all_heroes:
            if hero not in dead_heroes:
                hero_info = state_info.get_hero(hero)
                dis = TeamBattleTrainer.in_battle_range(
                    hero_info.pos, battle_range)
                if dis != -1:
                    heroes_out.append(hero)
                    # print('battle_id', state_info.battleid, "all_in_battle_range", "found hero not in circle", hero, "battle_range", battle_range, "distance", dis)
                else:
                    heroes_in.append(hero)
        return heroes_in, heroes_out

    # 考察一个英雄是否在团战圈中
    @staticmethod
    def in_battle_range(pos, battle_range):
        dis = StateUtil.cal_distance2(pos, TeamBattleTrainer.BATTLE_CIRCLE)
        if dis < battle_range * 1000 + 500:
            return -1
        return dis

    def search_team_battle(self, state_info):
        max_team = set()
        for hero in self.heros:
            battle_heros = self.search_team_battle_hero(state_info, hero)
            if len(battle_heros) > 1 and len(battle_heros) > len(max_team):
                max_team = battle_heros
        return max_team

    def search_team_battle_hero(self, state_info, hero):
        # 检查是否有团战,并且得到团战的范围内所有的单位
        # 团战范围的定义
        # 首先从一个英雄开始找起,如果它周围有敌人,就把敌人和自己人全都列为范围内,然后用新的人物继续寻找
        # 注:这里只找一个开团点
        checked_heros = set()
        team_battle_heros = set()

        # 找到第一个周围有敌人的
        team_battle_heros.add(hero)

        while len(checked_heros) < len(team_battle_heros):
            for hero in team_battle_heros.copy():
                if hero not in checked_heros:
                    near_enemy_heroes = StateUtil.get_nearby_enemy_heros(
                        state_info, hero, TeamBattleTrainer.MODEL_RANGE)
                    for enemy in near_enemy_heroes:
                        team_battle_heros.add(enemy.hero_name)
                    checked_heros.add(hero)

        return team_battle_heros

    def get_model_actions_team(self,
                               state_info,
                               team,
                               battle_heroes,
                               debug=False):
        # 第一个人先选,然后第二个人,一直往后,后面的人会在参数中添加上之前人的行为
        # 同时可以变成按照模型给出maxq大小来决定谁先选
        # 这样的好处是所有人选择的行为就是最后执行的行为

        # 暂时为随机英雄先选
        # first_hero = heroes[0]

        # 得到当前团战范围,因为会收缩
        battle_range = self.cal_battle_range(
            len(self.state_cache) - self.battle_started)

        # 首先得到当前情况下每个英雄的基础输入集和所有无效的选择
        hero_input_map = {}
        hero_unavail_list_map = {}
        for hero in team:
            data_input = TeamBattleInput.gen_input(state_info, hero,
                                                   battle_heroes)
            data_input = np.array(data_input)
            hero_input_map[hero] = data_input

            unaval_list = TeamBattleTrainer.list_unaval_actions(
                self.act_size, state_info, hero, battle_heroes, battle_range)
            unaval_list_str = ' '.join(
                str("%.4f" % float(act)) for act in unaval_list)
            hero_unavail_list_map[hero] = unaval_list
            if debug:
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "hero", hero, "model remove_unaval_actions",
                      unaval_list_str)

        # 得到每个英雄的推荐行为
        hero_recommend_list_map = {}
        for hero in team:
            friends, opponents = TeamBattleUtil.get_friend_opponent_heros(
                battle_heroes, hero)
            hero_info = state_info.get_hero(hero)
            recommend_list = TeamBattlePolicy.select_action_by_strategy(
                state_info, hero_info, friends, opponents)
            hero_recommend_list_map[hero] = recommend_list

        # 开始挑选英雄行为,每次根据剩余英雄的最优选择,根据Q大小来排序
        action_cmds = []
        input_list = []
        left_heroes = list(team)
        model_upgrade = False
        while len(left_heroes) > 0:
            cur_max_q = -1
            chosen_hero = left_heroes[0]
            chosen_action_list = None
            for hero in left_heroes:
                # 对于之前的英雄行为,加入输入
                hero_info = state_info.get_hero(hero)
                data_input = hero_input_map[hero]
                for prev_action in action_cmds:
                    data_input = TeamBattleInput.add_other_hero_action(
                        data_input, hero_info, prev_action, debug)

                unaval_list = hero_unavail_list_map[hero]
                recommend_list = hero_recommend_list_map[hero]
                action_list, explor_value, vpreds, clear_cache = self.model_util.get_action_list(
                    self.battle_id, hero, data_input)
                action_str = ' '.join(
                    str("%.4f" % float(act)) for act in action_list)
                max_q = TeamBattleTrainer.get_max_q(action_list, unaval_list,
                                                    recommend_list)
                if debug:
                    print("battle_id", self.battle_id, "tick", state_info.tick,
                          "本轮行为候选", "hero", hero, "max_q", max_q,
                          "model action list", action_str)

                # 允许等于是为了支持max_q等于-1的情况
                if max_q >= cur_max_q:
                    cur_max_q = max_q
                    chosen_hero = hero
                    chosen_action_list = action_list

                # 如果模型升级了,需要清空所有缓存用作训练的行为,并且重启游戏
                if clear_cache:
                    print('battle_id', self.battle_id, '模型升级,清空训练缓存')
                    for hero_name in self.heros:
                        self.model_caches[hero_name].clear_cache()
                    model_upgrade = True

            # 使用最大q的英雄的行为
            unaval_list = hero_unavail_list_map[chosen_hero]
            recommend_list = hero_recommend_list_map[hero]
            friends, opponents = TeamBattleUtil.get_friend_opponent_heros(
                battle_heroes, chosen_hero)
            action_cmd, max_q, selected = TeamBattleTrainer.get_action_cmd(
                chosen_action_list, unaval_list, recommend_list, state_info,
                chosen_hero, friends, opponents)
            if debug:
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "hero", chosen_hero, "model get_action",
                      StateUtil.build_command(action_cmd), "max_q", max_q,
                      "selected", selected)

            # 更新各个状态集
            action_cmds.append(action_cmd)
            input_list.append(data_input)
            left_heroes.remove(chosen_hero)
        return action_cmds, input_list, model_upgrade

    def get_model_actions(self, state_info, heros, debug=False):
        # 第一个人先选,然后第二个人,一直往后,后面的人会在参数中添加上之前人的行为
        # TODO 同时可以变成按照模型给出maxq大小来决定谁先选
        # 这样的好处是所有人选择的行为就是最后执行的行为

        # 暂时为随机英雄先选
        random_heros = list(heros)
        shuffle(random_heros)

        # 得到当前团战范围,因为会收缩
        battle_range = self.cal_battle_range(
            len(self.state_cache) - self.battle_started)

        action_cmds = []
        input_list = []
        model_upgrade = False
        for hero in random_heros:
            hero_info = state_info.get_hero(hero)
            data_input = TeamBattleInput.gen_input(state_info, hero)
            data_input = np.array(data_input)

            # 对于之前的英雄行为,加入输入
            for prev_action in action_cmds:
                data_input = TeamBattleInput.add_other_hero_action(
                    data_input, hero_info, prev_action, debug)

            action_list, explor_value, vpreds, clear_cache = self.model_util.get_action_list(
                self.battle_id, hero, data_input)
            action_str = ' '.join(
                str("%.4f" % float(act)) for act in action_list)
            if debug:
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "hero", hero, "model action list", action_str)
            unaval_list = TeamBattleTrainer.list_unaval_actions(
                action_list, state_info, hero, heros, battle_range)
            unaval_list_str = ' '.join(
                str("%.4f" % float(act)) for act in unaval_list)
            if debug:
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "hero", hero, "model remove_unaval_actions",
                      unaval_list_str)
            friends, opponents = TeamBattleUtil.get_friend_opponent_heros(
                heros, hero)
            action_cmd, max_q, selected = TeamBattleTrainer.get_action_cmd(
                action_list, unaval_list, state_info, hero, friends, opponents)
            if debug:
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "hero", hero, "model get_action",
                      StateUtil.build_command(action_cmd), "max_q", max_q,
                      "selected", selected)

            # 如果模型升级了,需要清空所有缓存用作训练的行为,并且重启游戏
            if clear_cache:
                print('battle_id', self.battle_id, '模型升级,清空训练缓存')
                for hero_name in self.heros:
                    self.model_caches[hero_name].clear_cache()
                model_upgrade = True

            action_cmds.append(action_cmd)
            input_list.append(data_input)
        return action_cmds, input_list, model_upgrade

    @staticmethod
    # 过滤输出结果,删除掉不可执行的选择
    # 这里有两个思路,像原来一样只执行可以执行的
    # 第二种是面对不可执行的,我们就选择逼近对方
    # 输出信息:
    # 移动:八个方向;物理攻击:五个攻击目标;技能1:五个攻击目标;技能2:五个攻击目标;技能3:五个攻击目标
    # 技能攻击目标默认为对方英雄。如果是辅助技能,目标调整为自己人
    # 对于技能可以是自己也可以是对方的,目前无法处理
    def list_unaval_actions(act_size,
                            state_info,
                            hero_name,
                            team_battle_heros,
                            battle_range,
                            debug=False):
        friends, opponents = TeamBattleUtil.get_friend_opponent_heros(
            team_battle_heros, hero_name)
        avail_list = [-1] * act_size
        for i in range(act_size):
            hero = state_info.get_hero(hero_name)
            selected = i
            if selected < 8:  # move
                # 不再检查movelock,因为攻击硬直也会造成这个值变成false(false表示不能移动)
                # 屏蔽会离开战圈的移动
                fwd = StateUtil.mov(selected)
                move_pos = TeamBattleUtil.play_move(hero, fwd)
                in_range = TeamBattleTrainer.in_battle_range(
                    move_pos, battle_range)
                if in_range != -1:
                    avail_list[selected] = -1
                else:
                    avail_list[selected] = 1
                continue
            elif selected < 13:  # 物理攻击:五个攻击目标
                target_index = selected - 8
                target_hero = TeamBattleUtil.get_target_hero(
                    hero_name, friends, opponents, target_index)
                if target_hero is None:
                    avail_list[selected] = -1
                    if debug: print("找不到对应目标英雄")
                    continue
                rival_info = state_info.get_hero(target_hero)
                dist = StateUtil.cal_distance(hero.pos, rival_info.pos)
                # 英雄不可见
                if not rival_info.is_enemy_visible():
                    avail_list[selected] = -1
                    if debug: print("英雄不可见")
                    continue
                # 英雄太远,放弃普攻
                # if dist > self.att_dist:
                if dist > StateUtil.ATTACK_HERO_RADIUS:
                    avail_list[selected] = 0
                    if debug: print("英雄太远,放弃普攻")
                    continue
                # 对方英雄死亡时候忽略这个目标
                elif rival_info.hp <= 0:
                    avail_list[selected] = -1
                    if debug: print("对方英雄死亡")
                    continue
                avail_list[selected] = 1
            elif selected < 28:  # skill1
                # TODO 处理持续施法,目前似乎暂时还不需要
                skillid = int((selected - 13) / 5 + 1)
                if hero.skills[skillid].canuse != True:
                    # 被沉默,被控制住(击晕击飞冻结等)或者未学会技能
                    avail_list[selected] = -1
                    if debug:
                        print("技能受限,放弃施法" + str(skillid) +
                              " hero.skills[x].canuse=" +
                              str(hero.skills[skillid].canuse) + " tick=" +
                              str(state_info.tick))
                    continue
                if hero.skills[skillid].cost is not None and hero.skills[
                        skillid].cost > hero.mp:
                    # mp不足
                    # 特殊情况,德古拉1,2技能是扣除血量
                    if not (hero.cfg_id == '103' and
                            (skillid == 1 or skillid == 2)):
                        avail_list[selected] = -1
                        if debug: print("mp不足,放弃施法" + str(skillid))
                        continue
                if hero.skills[skillid].cd > 0:
                    # 技能未冷却
                    avail_list[selected] = -1
                    if debug: print("技能cd中,放弃施法" + str(skillid))
                    continue
                tgt_index = selected - 13 - (skillid - 1) * 5
                skill_info = SkillUtil.get_skill_info(hero.cfg_id, skillid)
                # TODO 这个buff逻辑还没有测试对应的英雄
                is_buff = True if skill_info.cast_target == SkillTargetEnum.buff else False
                is_self = True if skill_info.cast_target == SkillTargetEnum.self else False
                tgt_hero = TeamBattleUtil.get_target_hero(
                    hero.hero_name, friends, opponents, tgt_index, is_buff,
                    is_self)

                if tgt_hero is None:
                    avail_list[selected] = -1
                    if debug: print("找不到对应目标英雄")
                    continue
                [tgtid, tgtpos] = TeamBattleTrainer.choose_skill_target(
                    tgt_index, state_info, skill_info, hero_name, hero.pos,
                    tgt_hero, debug)
                if tgtid == -1 or tgtid == 0:
                    avail_list[selected] = tgtid
                    if debug: print("目标不符合施法要求")
                    continue
                else:
                    # 根据规则再去过滤
                    policy_avail = TeamBattlePolicy.check_skill_condition(
                        skill_info, state_info, hero, tgt_hero, friends,
                        opponents)
                    if not policy_avail:
                        avail_list[selected] == -1
                    else:
                        avail_list[selected] = 1
        return avail_list

    @staticmethod
    def choose_skill_target(selected,
                            state_info,
                            skill_info,
                            hero_name,
                            pos,
                            tgt_hero_name,
                            debug=False):
        hero_info = state_info.get_hero(hero_name)
        if selected == 0:
            # 施法目标为自己
            # 首先判断施法目标是不是只限于敌方英雄
            if skill_info.cast_target == SkillTargetEnum.self and hero_name != str(
                    tgt_hero_name):
                if debug: print("施法目标为self,但是对象不是自己")
                return [-1, None]
            tgtid = hero_name
            # TODO 这里有点问题,如果是目标是自己的技能,是不是要区分下目的,否则fwd计算会出现问题
            tgtpos = None
        if selected <= 4:
            # 攻击对方英雄
            tgt_hero = state_info.get_hero(tgt_hero_name)
            if tgt_hero.team != hero_info.team and not tgt_hero.is_enemy_visible(
            ):
                if debug: print("敌方英雄不可见")
                tgtid = -1
                tgtpos = None
            elif StateUtil.cal_distance(tgt_hero.pos,
                                        pos) > skill_info.cast_distance:
                if debug:
                    print("技能攻击不到对方 %s %s %s" %
                          (tgt_hero_name,
                           StateUtil.cal_distance(
                               tgt_hero.pos, pos), skill_info.cast_distance))
                tgtid = 0
                tgtpos = None
            # 对方英雄死亡时候忽略这个目标
            elif tgt_hero.hp <= 0:
                if debug: print("技能攻击不了对方,对方已经死亡")
                tgtid = -1
                tgtpos = None
            else:
                tgtid = tgt_hero_name
                tgtpos = tgt_hero.pos
        return tgtid, tgtpos

    @staticmethod
    def get_max_q(action_list, unaval_list, recommmend_list):
        q_list = list(action_list)

        # 如果有推荐的行为,只从中挑选
        if len(recommmend_list) > 0:
            for i in range(len(action_list)):
                if i not in recommmend_list:
                    q_list[i] = -1

        while True:
            max_q = max(q_list)

            if max_q <= -1:
                return max_q

            selected = q_list.index(max_q)
            avail_type = unaval_list[selected]
            if avail_type == -1:
                # TODO avail_type == 0: 是否考虑技能不可用时候不接近对方
                # 不可用行为
                q_list[selected] = -1
                continue
            return max_q

    @staticmethod
    def get_action_cmd(action_list,
                       unaval_list,
                       recommmend_list,
                       state_info,
                       hero_name,
                       friends,
                       opponents,
                       revert=False):
        hero = state_info.get_hero(hero_name)
        found = False

        # 如果有推荐的行为,只从中挑选
        if len(recommmend_list) > 0:
            for i in range(len(action_list)):
                if i not in recommmend_list:
                    action_list[i] = -1
            print("battle_id", state_info.battleid, "tick", state_info.tick,
                  "hero", hero_name, "根据推荐,只从以下行为中挑选",
                  ",".join(str("%f" % float(act)) for act in action_list),
                  ",".join(str("%f" % float(act)) for act in recommmend_list))

        while not found:
            max_q = max(action_list)
            if max_q <= -1:
                action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None,
                                   hero.pos, None, None, 48, None)
                return action, max_q, -1

            selected = action_list.index(max_q)
            avail_type = unaval_list[selected]
            if avail_type == -1:
                #TODO avail_type == 0: 是否考虑技能不可用时候不接近对方
                # 不可用行为
                action_list[selected] = -1
                continue

            if selected < 8:  # move
                fwd = StateUtil.mov(selected, revert)
                # 根据我们的移动公式计算一个目的地,缺点是这样可能被障碍物阻挡,同时可能真的可以移动距离比我们计算的长
                tgtpos = TeamBattleUtil.set_move_target(hero, fwd)
                # tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15, hero.pos.y + fwd.y * 15, hero.pos.z + fwd.z * 15)
                action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None,
                                   None, tgtpos, None, None, selected, None)
                return action, max_q, selected
            elif selected < 13:  # 对敌英雄使用普攻
                target_index = selected - 8
                target_hero = TeamBattleUtil.get_target_hero(
                    hero.hero_name, friends, opponents, target_index)
                target_hero_info = state_info.get_hero(target_hero)
                avail_type = unaval_list[selected]
                if avail_type == 0:
                    action = CmdAction(hero.hero_name, CmdActionEnum.MOVE,
                                       None, None, target_hero_info.pos, None,
                                       None, selected, None)
                else:
                    action = CmdAction(hero.hero_name, CmdActionEnum.ATTACK, 0,
                                       target_hero, None, None, None, selected,
                                       None)
                return action, max_q, selected
            elif selected < 28:  # skill
                skillid = int((selected - 13) / 5 + 1)
                tgt_index = selected - 13 - (skillid - 1) * 5
                skill_info = SkillUtil.get_skill_info(hero.cfg_id, skillid)
                is_buff = True if skill_info.cast_target == SkillTargetEnum.buff else False
                is_self = True if skill_info.cast_target == SkillTargetEnum.self else False
                tgt_hero = TeamBattleUtil.get_target_hero(
                    hero.hero_name, friends, opponents, tgt_index, is_buff,
                    is_self)
                tgt_pos = state_info.get_hero(tgt_hero).pos
                fwd = tgt_pos.fwd(hero.pos)
                avail_type = unaval_list[selected]
                if avail_type == 0:
                    action = CmdAction(hero.hero_name, CmdActionEnum.MOVE,
                                       None, None, tgt_pos, None, None,
                                       selected, None)
                else:
                    action = CmdAction(hero.hero_name, CmdActionEnum.CAST,
                                       skillid, tgt_hero, tgt_pos, fwd, None,
                                       selected, None)
                return action, max_q, selected

    def buy_equip(self, state_info, hero_name):
        # 决定是否购买道具
        buy_action = EquipUtil.buy_equip(state_info, hero_name)
        if buy_action is not None:
            buy_str = StateUtil.build_command(buy_action)
            return buy_str

    def upgrade_skills(self, state_info, hero_name):
        # 如果有可以升级的技能,优先升级技能3
        hero = state_info.get_hero(hero_name)
        skills = StateUtil.get_skills_can_upgrade(hero)
        if len(skills) > 0:
            skillid = 3 if 3 in skills else skills[0]
            update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE,
                                   skillid, None, None, None, None, None, None)
            update_str = StateUtil.build_command(update_cmd)
            return update_str
Example #17
0
    def build_response(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(
            self.state_cache) > 0 else None
        response_strs = []

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return {"ID": raw_state_info.battleid, "tick": -1}

        if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (
                prev_state_info is None
                or prev_state_info.tick > raw_state_info.tick):
            print("clear")
            prev_state_info = None
            self.state_cache = []
            self.battle_started = -1
            self.battle_heroes_cache = []
            self.dead_heroes = []
            self.dead_heroes_cache = []
            self.data_inputs = []
            self.rebooting = False
        elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE:
            # 不是开始帧的话直接返回重启游戏
            # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗
            print("battle_id", self.battle_id, "tick", raw_state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        state_info = StateUtil.update_state_log(prev_state_info,
                                                raw_state_info)
        hero = state_info.get_hero("27")

        if hero is None or hero.hp is None:
            # 偶然情况处理,如果找不到英雄,直接重开
            print("battle_id", self.battle_id, "tick", state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        # 战斗前准备工作
        if len(self.state_cache) == 0:
            # 第一帧的时候,添加金钱和等级
            for hero in self.heros:
                add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None,
                                         None, None, None, None, None, None)
                add_gold_cmd.gold = 3000
                add_gold_str = StateUtil.build_command(add_gold_cmd)
                response_strs.append(add_gold_str)

                add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None,
                                       None, None, None, None, None)
                add_lv_cmd.lv = 9
                add_lv_str = StateUtil.build_command(add_lv_cmd)
                response_strs.append(add_lv_str)
        elif len(self.state_cache) > 1:
            # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧
            for hero in self.heros:
                upgrade_cmd = self.upgrade_skills(state_info, hero)
                if upgrade_cmd is not None:
                    response_strs.append(upgrade_cmd)

                buy_cmd = self.buy_equip(state_info, hero)
                if buy_cmd is not None:
                    response_strs.append(buy_cmd)

        for hero in self.heros:
            # 判断是否英雄死亡
            if prev_state_info is not None:
                dead = StateUtil.if_hero_dead(prev_state_info, state_info,
                                              hero)
                if dead == 1 and hero not in self.dead_heroes:
                    print("battle_id", self.battle_id, "tick", state_info.tick,
                          "英雄死亡", hero, "tick", state_info.tick)
                    self.dead_heroes.append(hero)

        # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定
        # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动
        #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来

        # 这里会排除掉死亡的英雄,他们不需要再加入团战
        # 团战范围在收缩
        battle_range = self.cal_battle_range(
            len(self.state_cache) - self.battle_started)
        heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range(
            state_info, self.heros, self.dead_heroes, battle_range)

        # 存活英雄
        battle_heros = list(heroes_in_range)
        battle_heros.extend(heroes_out_range)

        # 缓存参战情况和死亡情况,用于后续训练
        self.battle_heroes_cache.append(battle_heros)
        self.dead_heroes_cache.append(list(self.dead_heroes))

        if state_info.tick >= 142560:
            debuginfo = True

        # 团战还没有开始,有英雄还在圈外
        if len(heroes_out_range) > 0:
            if self.battle_started > -1:
                print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外",
                      ','.join(heroes_out_range), "battle_range", battle_range)

            # 移动到两个开始战斗地点附近
            # 如果是团战开始之后,移动到团战中心点
            for hero in heroes_out_range:
                start_point_x = randint(0, 8000)
                start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0
                start_point_z += randint(-4000, 4000)
                if TeamBattleUtil.get_hero_team(hero) == 0:
                    start_point_z *= -1
                start_point_z += TeamBattleTrainer.BATTLE_POINT_Z
                tgt_pos = PosStateInfo(start_point_x, 0, start_point_z)
                move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None,
                                        tgt_pos, None, None, None, None)
                mov_cmd_str = StateUtil.build_command(move_action)
                response_strs.append(mov_cmd_str)
        # 团战已经开始
        elif not self.rebooting:
            if self.battle_started == -1:
                self.battle_started = len(self.state_cache)

            # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态
            state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible(
                state_info, self.state_cache)

            # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range)
            # 跟队伍,每个队伍得到行为
            team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range)
            team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team(
                state_info, team_a, heroes_in_range)
            team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team(
                state_info, team_b, heroes_in_range)

            # 如果模型已经开战,重启战斗
            if (model_upgrade_a or model_upgrade_b
                ) and self.battle_started < len(self.state_cache) + 1:
                print("battle_id", self.battle_id, "因为模型升级,重启战斗",
                      self.battle_started, len(self.state_cache))
                action_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
                rsp_obj = {
                    "ID": raw_state_info.battleid,
                    "tick": raw_state_info.tick,
                    "cmd": action_strs
                }
                rsp_str = JSON.dumps(rsp_obj)
                return rsp_str
            data_input_map = {}
            for action_cmd, data_input in zip(team_actions_a + team_actions_b,
                                              input_list_a + input_list_b):
                action_str = StateUtil.build_command(action_cmd)
                response_strs.append(action_str)
                state_info.add_action(action_cmd)
                data_input_map[action_cmd.hero_name] = data_input

            # 缓存所有的模型输入,用于后续训练
            self.data_inputs.append(data_input_map)

        # 添加记录到缓存中
        self.state_cache.append(state_info)

        # 将模型行为加入训练缓存,同时计算奖励值
        # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟
        last_x_index = 2
        if self.battle_started > -1 and len(self.data_inputs) >= last_x_index:
            if self.rebooting:
                # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启")

                # 重启游戏
                response_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
            else:
                state_index = len(self.state_cache) - last_x_index
                win, win_team, left_heroes = self.remember_replay_heroes(
                    -last_x_index, state_index, battle_range)

                # 团战结束条件
                # 首先战至最后一人
                # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range)
                # if self.battle_started:
                #     if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1):
                if win == 1:
                    # 重启游戏
                    print('battle_id', self.battle_id, "重启游戏", "剩余人员",
                          ','.join(left_heroes))
                    response_strs = [
                        StateUtil.build_action_command('27', 'RESTART', None)
                    ]
                    self.rebooting = True
        # battle_heros = self.search_team_battle(state_info)
        # if len(battle_heros) > 0:
        #     print("team battle heros", ';'.join(battle_heros))
        #
        # heros_need_model = []
        # for hero in self.heros:
        #     # 判断是否英雄死亡
        #     if prev_state_info is not None:
        #         dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero)
        #         if dead == 1 and hero not in self.dead_heroes:
        #             self.dead_heroes.append(hero)
        #
        #     # 复活的英雄不要再去参团
        #     if hero in self.dead_heroes:
        #         continue
        #
        #     # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE)
        #     if hero not in battle_heros:
        #         # 移动到团战点附近,添加部分随机
        #         rdm_delta_x = randint(0, 1000)
        #         rdm_delta_z = randint(0, 1000)
        #         tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z)
        #         move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None)
        #         mov_cmd_str = StateUtil.build_command(move_action)
        #         response_strs.append(mov_cmd_str)
        #     else:
        #         # 启动模型决策
        #         heros_need_model.append(hero)
        #
        # if len(heros_need_model) > 0:
        #     action_cmds = self.get_model_actions(state_info, heros_need_model)
        #     for action_cmd in action_cmds:
        #         action_str = StateUtil.build_command(action_cmd)
        #         response_strs.append(action_str)
        #         state_info.add_action(action_cmd)

        #TODO 记录模型输出,用于后续训练

        # 返回结果给游戏端
        rsp_obj = {
            "ID": state_info.battleid,
            "tick": state_info.tick,
            "cmd": response_strs
        }
        rsp_str = JSON.dumps(rsp_obj)
        print('battle_id', self.battle_id, 'response', rsp_str)
        return rsp_str
Example #18
0
class StateUtil:
    # 注:游戏并不会严格的每528返回一个值,这个只是PC情况,而且中间这个值也可能缩短
    TICK_PER_STATE = 528
    NEARBY_BASEMENT_RADIUS = 7
    ATTACK_HERO_RADIUS = 7  # 13.5
    ATTACK_UNIT_RADIUS = 7  # 10
    TOWER_ATTACK_RADIUS = 8

    # 需要和ATTACK_HERO_RADIUS一致才行
    LINE_MODEL_RADIUS = 7
    GOLD_GAIN_RADIUS = 11
    MAX_RADIUS = 50

    BASEMENT_TEAM_0 = PosStateInfo(-75680, -80, 0)
    BASEMENT_TEAM_1 = PosStateInfo(75140, -80, 0)

    ATTACK_SKILL_RANGES = {
        "10101": 2000,
        "10110": 8000,
        "10l120": 6000,
        "10130": 3500,
        "10200": 2000,
        "10210": 8000,
        "10220": 5000,
        "10230": 6000
    }

    LINE_WAY_POINTS = [[
        PosStateInfo(56800, 0, -2800),
        PosStateInfo(54000, 0, -5100),
        PosStateInfo(53000, 0, -20000),
        PosStateInfo(37500, 0, -29500),
        PosStateInfo(31800, 0, -38700),
        PosStateInfo(14000, 0, -54500),
        PosStateInfo(-600, 0, -61000),
        PosStateInfo(-22100, 0, -47000),
        PosStateInfo(-33400, 0, -37500),
        PosStateInfo(-41000, 0, -27000),
        PosStateInfo(-51000, 0, -13000),
        PosStateInfo(-56300, 0, 800)
    ],
                       [
                           PosStateInfo(58100, 0, -100),
                           PosStateInfo(45100, 0, -2000),
                           PosStateInfo(28800, 0, 500),
                           PosStateInfo(16900, 0, 1000),
                           PosStateInfo(0, 0, -1000),
                           PosStateInfo(-11500, 0, 300),
                           PosStateInfo(-17400, 0, -200),
                           PosStateInfo(-44900, 0, 2600),
                           PosStateInfo(-56500, 0, 1700)
                       ],
                       [
                           PosStateInfo(56900, 0, 2600),
                           PosStateInfo(54000, 0, 5000),
                           PosStateInfo(54200, 0, 18400),
                           PosStateInfo(43300, 0, 25700),
                           PosStateInfo(36500, 0, 34000),
                           PosStateInfo(26000, 0, 45200),
                           PosStateInfo(0, 0, 60800),
                           PosStateInfo(-20600, 0, 51700),
                           PosStateInfo(-39900, 0, 30000),
                           PosStateInfo(-52900, 0, 14800),
                           PosStateInfo(-56800, 0, 2600)
                       ]]

    @staticmethod
    def if_hero_dead(prev_state, cur_state, hero_name):
        prev_hero = prev_state.get_hero(hero_name)
        cur_hero = cur_state.get_hero(hero_name)
        dead = 1 if prev_hero.hp > 0 and cur_hero.hp <= 0 else 0
        if dead:
            breakpoint = 1
        return dead

    @staticmethod
    def get_attack_cast_dmg(cur_state, next_state, next_next_state, hero_name,
                            rival_hero):
        dmg = 0
        cur_act = cur_state.get_hero_action(hero_name)
        skill_slot = cur_act.skillid
        # 只有攻击才会计算对对方英雄造成的伤害
        if cur_act.action == CmdActionEnum.CAST:
            # 对于技能,查看当前帧和后续帧,这个技能造成的伤害
            dmg = next_state.get_hero_dmg_skill(hero_name, skill_slot,
                                                rival_hero)
            dmg += next_next_state.get_hero_dmg_skill(hero_name, skill_slot,
                                                      rival_hero)
        elif cur_act.action == CmdActionEnum.ATTACK:
            # 对于物攻,同样因为伤害有延迟,先检查玩家在下一帧的行动
            # 如果不是物攻,这读取这两帧中间的物攻伤害
            next_act = next_state.get_hero_action(hero_name)
            if next_act is None or next_act.action != CmdActionEnum.ATTACK:
                dmg = next_state.get_hero_dmg_skill(hero_name, skill_slot,
                                                    rival_hero)
                dmg += next_next_state.get_hero_dmg_skill(
                    hero_name, skill_slot, rival_hero)
            else:
                # 否则只计算当前帧的,简单起见
                dmg = next_state.get_hero_dmg_skill(hero_name, skill_slot,
                                                    rival_hero)
        return dmg

    @staticmethod
    # 固定检测中路第一个塔是否被摧毁了
    def if_first_tower_destroyed_in_middle_line(state_info):
        for unit in state_info.units:
            if unit.pos.x == 17110 or unit.pos.x == -17110:
                if unit.hp <= 0:
                    print(unit.unit_name + '塔被摧毁, win:' + str(unit.team) +
                          " detail:" + unit.pos.to_string())
                    return unit.team
        return None

    @staticmethod
    def get_tower_hp_change(state_info,
                            next_info,
                            hero_name,
                            line_idx,
                            self_tower=True):
        hero_state = state_info.get_hero(hero_name)
        near_own_towers = StateUtil.get_near_towers_in_line(
            state_info, hero_state, line_idx, StateUtil.LINE_MODEL_RADIUS)
        if self_tower:
            near_own_towers = [
                t for t in near_own_towers if t.team == hero_state.team
            ]
        else:
            near_own_towers = [
                t for t in near_own_towers if t.team != hero_state.team
            ]
        hp_change = 0
        destroyed = False
        for tower in near_own_towers:
            next_state_tower = next_info.get_unit(tower.unit_name)
            if next_state_tower is None or next_state_tower.hp <= 0:
                hp_change += float(tower.hp) / tower.maxhp
                destroyed = True
            else:
                hp_change += float(tower.hp -
                                   next_state_tower.hp) / tower.maxhp
        return hp_change, destroyed

    @staticmethod
    def get_skills_can_upgrade(hero_info):
        skills = []
        for i in range(1, 4):
            skill_info = hero_info.skills[i]
            if skill_info.up:
                skills.append(i)
        return skills

    @staticmethod
    def get_basement(hero_info):
        return StateUtil.BASEMENT_TEAM_1 if hero_info.team == 1 else StateUtil.BASEMENT_TEAM_0

    @staticmethod
    def if_hero_at_basement(hero_info):
        basement = StateUtil.BASEMENT_TEAM_1 if hero_info.team == 1 else StateUtil.BASEMENT_TEAM_0
        distance = StateUtil.cal_distance(hero_info.pos, basement)
        if distance < StateUtil.NEARBY_BASEMENT_RADIUS:
            return True
        else:
            return False

    @staticmethod
    def if_unit_monster(unit_info):
        # TODO 需要两个boss的id
        if int(unit_info.cfg_id) == 612 or int(
                unit_info.cfg_id) == 6410 or int(unit_info.cfg_id) == 611:
            return True
        return False

    @staticmethod
    def if_unit_tower(unit_name):
        if 26 >= int(unit_name) > 0:
            return True
        return False

    @staticmethod
    def if_unit_hero(unit_name):
        if 27 <= int(unit_name) <= 28:
            return True
        return False

    @staticmethod
    def if_unit_soldier(unit_cfgid):
        if int(unit_cfgid) == 911 or int(unit_cfgid) == 912 or int(
                unit_cfgid) == 913 or int(unit_cfgid) == 914:
            return True
        return False

    @staticmethod
    def get_heros_in_team(state_info, team_id):
        return [hero for hero in state_info.heros if hero.team == team_id]

    @staticmethod
    def get_units_in_team(state_info, team_id):
        return [
            unit for unit in state_info.units
            if unit.team == team_id and unit.state == 'in' and unit.hp > 0
        ]

    @staticmethod
    def get_dead_units_in_line(state_info,
                               team_id,
                               line_index,
                               hero_info=None,
                               search_range=MAX_RADIUS):
        result = []
        for unit in state_info.units:
            # 小兵死亡最后一条maxhp=0, hp=1,state=out,倒数第二条 maxhp正常,hp=0 state=in
            # 我们还是以hp=0作为死亡的信息吧
            if unit.hp <= 0 and unit.team == team_id:
                if StateUtil.if_in_line(unit, line_index) > 0:
                    if hero_info is not None:
                        if StateUtil.cal_distance(
                                unit.pos, hero_info.pos) <= search_range:
                            result.append(unit)
                    else:
                        result.append(unit)
        return result

    @staticmethod
    def if_unit_long_range_attack(unit_cfgid):
        if int(unit_cfgid) == 911:
            return False
        return True

    @staticmethod
    # TODO 核对信息
    def get_unit_value(unit_name, unit_cfgid):
        if int(unit_cfgid) == 911:
            return 25
        if int(unit_cfgid) == 912:
            return 20
        if int(unit_cfgid) == 913:
            return 50
        if int(unit_cfgid) == 914:
            return 50
        if int(unit_name) < 27:
            return 200
        else:
            print("unknow value unit %s cfg %s" % (unit_name, unit_cfgid))
        return -1

    @staticmethod
    def get_frontest_soldier_in_line(state_info, line_index, team_id):
        units = StateUtil.get_units_in_team(state_info, team_id)
        soldiers = [
            u for u in units if not StateUtil.if_unit_monster(u)
            and not StateUtil.if_unit_tower(u.unit_name)
        ]
        soldiers_in_line = 0
        frontest = None
        for idx, soldier in enumerate(soldiers):
            line_pos = StateUtil.if_in_line(soldier, line_index)
            if line_pos >= 0:
                soldiers_in_line += 1
                frontest = frontest if frontest is not None and (
                    (frontest.pos.x > soldier.pos.x and team_id == 0) or
                    (frontest.pos.x < soldier.pos.x
                     and team_id == 1)) else soldier

        # print('front_point team:%s, line:%s, %s/%s in line, frontest.x: %s' % (team_id, line_index, soldiers_in_line,
        #                                                                      len(soldiers), 0 if frontest is None else frontest.pos.x))
        return frontest

    @staticmethod
    def get_units_in_line(units, line_index):
        units_in_line = []
        for unit in units:
            if unit is None:
                continue
            line_pos = StateUtil.if_in_line(unit, line_index)
            if line_pos >= 0:
                units_in_line.append(unit)
        return units_in_line

    # 得到兵线位置,小兵数量
    # 兵线编号,从左到右为0-2
    # 逻辑过于复杂,可能因为一个兵线格子过长,计算中点时候导致离真实的两波小兵都很远
    @staticmethod
    def get_solider_lines(state_info, line_index, team_id):
        units = StateUtil.get_units_in_team(state_info, team_id)
        soldiers = [
            u for u in units if not StateUtil.if_unit_monster(u)
            and not StateUtil.if_unit_tower(u.unit_name)
        ]
        line_pos_map = {}
        soldiers_in_line = 0
        for idx, soldier in enumerate(soldiers):
            line_pos = StateUtil.if_in_line(soldier, line_index)
            if line_pos >= 0:
                soldiers_in_line += 1
                if line_pos not in line_pos_map:
                    line_pos_map[line_pos] = [soldier.unit_name]
                else:
                    line_pos_map[line_pos].append(soldier.unit_name)

        print('front_point team:%s, line:%s, %s/%s in line' %
              (team_id, line_index, soldiers_in_line, len(soldiers)))

        # 遍历所有的小兵位置信息,然后返回小兵的集中点
        # 集中点的定义为:每个格子记录一个集中点,为这个格子内所有小兵的中心位置
        soldier_lines = []
        cache_units = []
        for line_pos_idx in range(len(StateUtil.LINE_WAY_POINTS[line_index])):
            # 如果当前兵线区域没有小兵,则连续中断,将之前连续的部分存成一个集中点
            if line_pos_idx not in line_pos_map:
                # 计算中点
                pos = StateUtil.cal_soldier_wave_point(
                    state_info, line_pos_map[line_pos_idx])
                sl = SoldierLine(team_id, line_index, pos, cache_units)
                soldier_lines.append(sl)

        # 按照兵线从开始到结尾进行排序 team0的顺序需要翻转
        if team_id == 1 and len(soldier_lines) > 0:
            soldier_lines.reverse()

        return soldier_lines

    @staticmethod
    def cal_soldier_wave_point(state_info, unit_index_list):
        cached_x = 0
        cached_z = 0
        for unit_name in unit_index_list:
            unit = state_info.get_unit(unit_name)
            cached_x += unit.pos.x
            cached_z += unit.pos.z
        return PosStateInfo(int(cached_x / len(unit_index_list)), int(-80),
                            int(cached_z / len(unit_index_list)))

    # 返回单位在兵线上的位置
    # 结果从0开始
    @staticmethod
    def if_in_line(unit_info, line_index, range=3000):
        line = StateUtil.LINE_WAY_POINTS[line_index]
        for idx, point in enumerate(line):
            if idx >= len(line) - 1:
                continue
            next_point = line[idx + 1]
            bound_x1 = min(next_point.x, point.x)
            bound_x2 = max(next_point.x, point.x)
            bound_y1 = min(next_point.z, point.z) - range
            bound_y2 = max(next_point.z, point.z) + range
            if bound_x1 <= unit_info.pos.x <= bound_x2 and bound_y1 <= unit_info.pos.z <= bound_y2:
                return idx
        return -1

    @staticmethod
    def parse_state_log(json_str):
        # print(json_str)
        json_str = json_str[23:]
        # todo maybe becasu python3, the time before the { should be cut off
        state_json = JSON.loads(json_str)
        state_info = StateInfo.decode(state_json)
        return state_info

    @staticmethod
    def update_state_log(prev_state, cur_state):
        if prev_state is None:
            return cur_state
        # 因为每一次传输时候并不是全量信息,所以需要好上一帧的完整信息进行合并
        # 合并小兵信息
        # 合并野怪信息
        # 合并塔信息
        # 合并英雄信息
        new_state = prev_state.merge(cur_state)
        return new_state

    @staticmethod
    def get_nearby_enemy_heros(state_info,
                               hero_id,
                               max_distance=ATTACK_HERO_RADIUS):
        hero = state_info.get_hero(hero_id)
        enemy_hero_team = 1 - hero.team
        enemy_heros = StateUtil.get_heros_in_team(state_info, enemy_hero_team)

        nearby_enemies = []
        for enemy in enemy_heros:
            # 首先需要确定敌方英雄可见
            if enemy.is_enemy_visible() and enemy.hp > 0:
                distance = StateUtil.cal_distance(hero.pos, enemy.pos)
                if distance < max_distance:
                    nearby_enemies.append(enemy)
        nearby_enemies.sort(key=lambda h: int(h.hero_name), reverse=True)
        return nearby_enemies

    @staticmethod
    def get_nearby_friend_units(state_info,
                                hero_id,
                                max_distance=ATTACK_HERO_RADIUS):
        hero = state_info.get_hero(hero_id)
        friend_unit_team = hero.team
        friend_units = StateUtil.get_units_in_team(state_info,
                                                   friend_unit_team)

        nearby_friend_units = []
        for unit in friend_units:
            # 排除掉塔
            # 排除掉野怪
            if int(unit.unit_name) > 26 and not StateUtil.if_unit_monster(
                    unit) and unit.hp > 0:
                distance = StateUtil.cal_distance(hero.pos, unit.pos)
                if distance < max_distance:
                    nearby_friend_units.append(unit)
        nearby_friend_units.sort(key=lambda u: int(u.unit_name), reverse=True)
        return nearby_friend_units

    @staticmethod
    def get_nearby_enemy_units(state_info,
                               hero_id,
                               max_distance=ATTACK_HERO_RADIUS):
        hero = state_info.get_hero(hero_id)
        enemy_unit_team = 1 - hero.team
        enemy_units = StateUtil.get_units_in_team(state_info, enemy_unit_team)

        nearby_enemy_units = []
        for unit in enemy_units:
            # 排除掉塔
            # 排除掉野怪
            if int(unit.unit_name) > 26 and not StateUtil.if_unit_monster(
                    unit) and unit.hp > 0 and unit.state == 'in':
                distance = StateUtil.cal_distance(hero.pos, unit.pos)
                if distance < max_distance:
                    nearby_enemy_units.append(unit)
        nearby_enemy_units.sort(key=lambda u: int(u.unit_name), reverse=True)
        return nearby_enemy_units

    @staticmethod
    def get_nearest_enemy_tower(state_info,
                                hero_id,
                                max_distance=ATTACK_HERO_RADIUS):
        hero = state_info.get_hero(hero_id)
        enemy_unit_team = 1 - hero.team
        enemy_units = StateUtil.get_units_in_team(state_info, enemy_unit_team)
        nearest_enemy_tower = None
        for unit in enemy_units:
            # 排除小兵
            # 排除掉野怪
            if int(unit.unit_name) < 27 and not StateUtil.if_unit_monster(
                    unit) and unit.hp > 0:
                distance = StateUtil.cal_distance(hero.pos, unit.pos)
                if distance < max_distance:
                    nearest_enemy_tower = unit
                    max_distance = distance
        return nearest_enemy_tower

    @staticmethod
    def get_first_tower(state_info, hero):
        for unit in state_info.units:
            if unit.team == hero.team and (unit.pos.x == 17110
                                           or unit.pos.x == -17110):
                return unit
        return None

    @staticmethod
    def get_hp_restore_place(state_info, hero):
        for unit in state_info.units:
            if unit.team == hero.team and (unit.pos.x == 17110
                                           or unit.pos.x == -17110):
                # 移动到塔后侧
                near_tower_x = unit.pos.x - 3000 if hero.team == 0 else unit.pos.x + 3000
                pos = PosStateInfo(near_tower_x, unit.pos.y, unit.pos.z)
                return pos
        return None

    @staticmethod
    # 和下面函数的区别是这里是到达补血点
    def get_tower_behind(tower_info, hero, line_index):
        near_tower_x = tower_info.pos.x - 4000 if hero.team == 0 else tower_info.pos.x + 4000
        pos = PosStateInfo(near_tower_x, tower_info.pos.y, tower_info.pos.z)
        return pos

    @staticmethod
    # 这里是到达一个撤退点,注意不要去吃加血符文
    def get_retreat_pos(state_info, hero, line_index):
        towers = []
        for unit in state_info.units:
            if StateUtil.if_unit_tower(
                    unit.unit_name) and unit.team == hero.team:
                if StateUtil.if_in_line(unit, line_index) >= 0:
                    # 在英雄后面的塔
                    if hero.team == 0 and hero.pos.x > unit.pos.x:
                        towers.append(unit)
                    elif hero.team == 1 and hero.pos.x < unit.pos.x:
                        towers.append(unit)
        if len(towers) > 0:
            towers.sort(key=lambda t: math.fabs(hero.pos.x - t.pos.x),
                        reverse=False)
            near_tower = towers[0]
            # 移动到塔后侧
            near_tower_x = near_tower.pos.x - 3000 if hero.team == 0 else near_tower.pos.x + 3000
            near_tower_z = near_tower.pos.z - 2000 if hero.team == 0 else near_tower.pos.z + 2000
            pos = PosStateInfo(near_tower_x, near_tower.pos.y, near_tower_z)
            return pos
        else:
            basement_pos = StateUtil.BASEMENT_TEAM_1 if hero.team == 1 else StateUtil.BASEMENT_TEAM_0
            return basement_pos

    @staticmethod
    def get_near_towers_in_line(state_info, hero_state, line_idx, distance):
        towers = []
        for unit in state_info.units:
            if int(unit.unit_name) <= 26:
                if StateUtil.if_in_line(unit, line_idx) >= 0:
                    if StateUtil.cal_distance(
                            unit.pos, hero_state.pos) < distance:  # 根据配置得来
                        towers.append(unit)
        return towers

    @staticmethod
    def cal_distance2(pos1, pos2):
        # 忽略y值
        distance = math.sqrt((pos1.x - pos2.x) * (pos1.x - pos2.x) +
                             (pos1.z - pos2.z) * (pos1.z - pos2.z))
        return distance

    @staticmethod
    def cal_distance(pos1, pos2):
        # 忽略y值
        distance = math.sqrt((pos1.x - pos2.x) * (pos1.x - pos2.x) +
                             (pos1.z - pos2.z) * (pos1.z - pos2.z)) / 1000
        return distance

    @staticmethod
    def if_retreat(prev_pos, cur_pos, hero):
        if hero.team == 0 and cur_pos.x < prev_pos.x:
            return True
        if hero.team == 1 and cur_pos.x > prev_pos.x:
            return True
        return False

    @staticmethod
    def mov(direction, revert=False):
        # 根据输入0~7这8个整数,选择上下左右等八个方向返回
        fwd = None
        if direction == 0:
            fwd = FwdStateInfo(1000, 0, 0)
        elif direction == 1:
            fwd = FwdStateInfo(707, 0, 707)
        elif direction == 2:
            fwd = FwdStateInfo(0, 0, 1000)
        elif direction == 3:
            fwd = FwdStateInfo(-707, 0, 707)
        elif direction == 4:
            fwd = FwdStateInfo(0, 0, -1000)
        elif direction == 5:
            fwd = FwdStateInfo(-707, 0, -707)
        elif direction == 6:
            fwd = FwdStateInfo(-1000, 0, 0)
        else:
            fwd = FwdStateInfo(-707, 0, 707)

        if revert:
            fwd.x *= -1
            fwd.z *= -1
        return fwd

    @staticmethod
    def build_command(action):
        if action.action == CmdActionEnum.MOVE and action.tgtpos is not None:
            return {
                "hero_id": action.hero_name,
                "action": 'MOVE',
                "pos": action.tgtpos.to_string()
            }
        if action.action == CmdActionEnum.MOVE and action.fwd is not None:
            return {
                "hero_id": action.hero_name,
                "action": 'MOVE',
                "fwd": action.fwd.to_string()
            }
        if action.action == CmdActionEnum.ATTACK and action.tgtid is not None:
            return {
                "hero_id": action.hero_name,
                "action": 'ATTACK',
                "tgtid": str(action.tgtid)
            }
        if action.action == CmdActionEnum.CAST and action.skillid is not None:
            command = {
                "hero_id": action.hero_name,
                "action": 'CAST',
                "skillid": str(action.skillid)
            }
            if action.tgtid is not None:
                command['tgtid'] = str(action.tgtid)
            if action.tgtpos is not None:
                command['tgtpos'] = action.tgtpos.to_string()
            if action.fwd:
                command['fwd'] = action.fwd.to_string()
            return command
        if action.action == CmdActionEnum.UPDATE and action.skillid is not None:
            return {
                "hero_id": action.hero_name,
                "action": 'UPDATE',
                "skillid": str(action.skillid)
            }
        if action.action == CmdActionEnum.BUY and action.itemid is not None:
            return {
                "hero_id": action.hero_name,
                "action": 'BUY',
                "itemid": str(action.itemid)
            }
        if action.action == CmdActionEnum.AUTO:
            return {"hero_id": action.hero_name, "action": 'AUTO'}
        if action.action == CmdActionEnum.HOLD:
            # 设置hold的行为为移动到本地。因为如果之前一个动作是攻击,hold等于继续攻击
            return {
                "hero_id": action.hero_name,
                "action": 'MOVE',
                "pos": action.tgtpos.to_string()
            }
        if action.action == CmdActionEnum.RETREAT:
            return {
                "hero_id": action.hero_name,
                "action": 'MOVE',
                "pos": action.tgtpos.to_string()
            }
        if action.action == CmdActionEnum.RESTART:
            return {"hero_id": action.hero_name, "action": 'RESTART'}
        if action.action == CmdActionEnum.ADDGOLD:
            return {
                "hero_id": action.hero_name,
                "action": 'ADDGOLD',
                "gold": str(action.gold)
            }
        if action.action == CmdActionEnum.ADDLV:
            return {
                "hero_id": action.hero_name,
                "action": 'ADDLV',
                "lv": str(action.lv)
            }
        raise ValueError('unexpected action type ' + str(action.action))

    @staticmethod
    def build_action_command(hero_id, action, parameters):
        #todo 这个函数现在只传了一个action进来,但是现在的action里面以及包含了需要的信息了,这个函数需要重写一下
        if action == 'MOVE' and 'pos' in parameters:
            return {
                "hero_id": hero_id,
                "action": action,
                "pos": parameters['pos']
            }
        if action == 'ATTACK' and 'tgtid' in parameters:
            return {
                "hero_id": hero_id,
                "action": action,
                "tgtid": parameters['tgtid']
            }
        if action == 'CAST' and 'skillid' in parameters:
            command = {
                "hero_id": hero_id,
                "action": action,
                "skillid": parameters['skillid']
            }
            if 'tgtid' in parameters:
                command['tgtid'] = parameters['tgtid']
            if 'tgtpos' in parameters:
                command['tgtpos'] = parameters['tgtpos']
            if 'fwd' in parameters:
                command['fwd'] = parameters['fwd']
            return command
        if action == 'UPDATE' and 'skillid' in parameters:
            return {
                "hero_id": hero_id,
                "action": action,
                "skillid": parameters['skillid']
            }
        if action == 'AUTO':
            return {"hero_id": hero_id, "action": action}
        if action == 'HOLD':
            return {"hero_id": hero_id, "action": action}
        if action == 'RESTART':
            return {"hero_id": hero_id, "action": action}
        raise ValueError('unexpected action type ' + action)

    @staticmethod
    def build_action_response(state_info):
        battle_id = state_info.battleid
        tick = state_info.tick

        action_strs = []
        for hero in state_info.heros:
            # 测试代码:
            # 如果有可以升级的技能,直接选择第一个升级
            skills = StateUtil.get_skills_can_upgrade(hero)
            if len(skills) > 0:
                update_str = StateUtil.build_action_command(
                    hero.hero_name, 'UPDATE', {'skillid': str(skills[0])})
                action_strs.append(update_str)

            # 得到周围的英雄和敌人单位信息
            action_str = None
            nearby_enemy_heros = StateUtil.get_nearby_enemy_heros(
                state_info, hero.hero_name)
            nearby_enemy_units = StateUtil.get_nearby_enemy_units(
                state_info, hero.hero_name)
            total_len = len(nearby_enemy_heros) + len(nearby_enemy_units)
            if total_len > 0:
                ran_pick = randint(0, total_len - 1)
                tgtid = nearby_enemy_heros[ran_pick].hero_name if ran_pick < len(nearby_enemy_heros) \
                    else nearby_enemy_units[ran_pick - len(nearby_enemy_heros)].unit_name
                tgtpos = nearby_enemy_heros[ran_pick].pos if ran_pick < len(nearby_enemy_heros) \
                    else nearby_enemy_units[ran_pick - len(nearby_enemy_heros)].pos
                fwd = tgtpos.fwd(hero.pos)

                # 优先使用技能
                # 其实技能需要根据种类不同来返回朝向,目标,或者目标地点,甚至什么都不传
                for skillid in range(1, 4):
                    # canuse不光代表是否英雄被沉默了,不能使用技能,也表示当前技能等级是否为0而导致不可用,还表示是否在cd中
                    if hero.skills[skillid].canuse:
                        action_str = StateUtil.build_action_command(
                            hero.hero_name, 'CAST', {
                                'skillid': str(skillid),
                                'tgtid': tgtid,
                                'tgtpos': tgtpos.to_string(),
                                'fwd': fwd.to_string()
                            })
                        break
                if action_str is None:
                    action_str = StateUtil.build_action_command(
                        hero.hero_name, 'ATTACK', {'tgtid': tgtid})
            # 在前1分钟,命令英雄到达指定地点
            elif StateUtil.TICK_PER_STATE * 2 * 40 > int(tick) > 528:
                if hero.team == 0:
                    action_str = StateUtil.build_action_command(
                        hero.hero_name, 'MOVE', {'pos': '( -5000, -80, 0)'})
                else:
                    action_str = StateUtil.build_action_command(
                        hero.hero_name, 'MOVE', {'pos': '( 5000, -80, 0)'})
            else:
                action_str = StateUtil.build_action_command(
                    hero.hero_name, 'HOLD', {})

            action_strs.append(action_str)

        rsp_obj = {"ID": battle_id, "tick": tick, "cmd": action_strs}
        rsp_str = JSON.dumps(rsp_obj)
        return rsp_str

    @staticmethod
    def get_hit_rival_tower_dmg_ratio(cur_state, next_state, next_next_state,
                                      hero_name):
        dmg = 0
        cur_act = cur_state.get_hero_action(hero_name)
        skill_slot = cur_act.skillid
        if cur_act.action == CmdActionEnum.ATTACK:
            # 对于物攻,同样因为伤害有延迟,先检查玩家在下一帧的行动
            # 如果不是物攻,这读取这两帧中间的物攻伤害
            next_act = next_state.get_hero_action(hero_name)
            if next_act is None or next_act.action != CmdActionEnum.ATTACK:
                dmg = next_state.get_hero_tower_dmg(hero_name)
                dmg += next_next_state.get_hero_tower_dmg(hero_name)
            else:
                # 否则只计算当前帧的,简单起见
                dmg = next_state.get_hero_tower_dmg(hero_name)
        return dmg
Example #19
0
 def play_move(pos, fwd, time_second=0.5):
     # 不考虑不可到达等问题
     return PosStateInfo(pos.x + time_second * fwd.x,
                         pos.y + time_second * fwd.y,
                         pos.z + time_second * fwd.z)
Example #20
0
 def move_towards(start_pos, dest_pos, move_dis, distance):
     final_pos_x = start_pos.x + (dest_pos.x -
                                  start_pos.x) * move_dis / distance
     final_pos_z = start_pos.z + (dest_pos.y -
                                  start_pos.z) * move_dis / distance
     return PosStateInfo(final_pos_x, start_pos.y, final_pos_z)
Example #21
0
 def decode(obj):
     atker = obj['atker']
     defer = obj['defer'] if 'defer' in obj else None
     tgtpos = PosStateInfo.decode(obj['tgtpos']) if 'tgtpos' in obj else None
     skill = obj['skill']
     return AttackStateInfo(atker, defer, tgtpos, skill)