def get_action(selected, state_info, hero, hero_name, rival_hero, revert=False): if selected < 8: # move fwd = StateUtil.mov(selected, revert) tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15, hero.pos.y + fwd.y * 15, hero.pos.z + fwd.z * 15) action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, selected, None) return action elif selected < 18: # 对敌英雄,塔,敌小兵1~8使用普攻 if selected == 8: # 敌方塔 tower = StateUtil.get_nearest_enemy_tower( state_info, hero_name, StateUtil.ATTACK_UNIT_RADIUS) tgtid = tower.unit_name action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action elif selected == 9: # 敌方英雄 tgtid = rival_hero action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action else: # 小兵 creeps = StateUtil.get_nearby_enemy_units( state_info, hero_name) n = selected - 10 tgtid = creeps[n].unit_name action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action elif selected < 48: # skill skillid = int((selected - 18) / 10 + 1) [tgtid, tgtpos] = LineModel.choose_skill_target( selected - 18 - (skillid - 1) * 10, state_info, skillid, hero_name, hero.pos, rival_hero) if tgtpos is None: fwd = None else: fwd = tgtpos.fwd(hero.pos) action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid, tgtpos, fwd, None, selected, None) return action elif selected == 48: # hold # print("轮到了48号行为-hold") action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, hero.pos, None, None, 48, None) return action else: # 撤退 retreat_pos = StateUtil.get_retreat_pos(state_info, hero, line_index=1) action = CmdAction(hero_name, CmdActionEnum.RETREAT, None, None, retreat_pos, None, None, selected, None) return action
def keep_away_from(state_info, hero_info, rival_hero_info, action_ratios, danger_pos, danger_radius): changed = False maxQ = max(action_ratios) selected = action_ratios.index(maxQ) if maxQ == -1: return action_ratios for selected in range(len(action_ratios)): if action_ratios[selected] == -1: continue if selected < 8: fwd = StateUtil.mov(selected) tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 0.5, hero_info.pos.y + fwd.y * 0.5, hero_info.pos.z + fwd.z * 0.5) if StateUtil.cal_distance(tgtpos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '移动方向会进入危险区域', hero_info.pos.to_string(), tgtpos.to_string()) action_ratios[selected] = -1 elif selected < 18: # 对敌英雄,塔,敌小兵1~8使用普攻, 针对近战英雄的检测 if selected == 8: # 敌方塔 print('策略选择', state_info.battleid, hero_info.hero_name, '不要去攻击塔') action_ratios[selected] = -1 elif selected == 9: # 敌方英雄 if StateUtil.cal_distance(rival_hero_info.pos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '不要去近身攻击塔范围内的英雄') action_ratios[selected] = -1 else: # 小兵 creeps = StateUtil.get_nearby_enemy_units( state_info, hero_info.hero_name) n = selected - 10 tgt = creeps[n] if StateUtil.cal_distance(tgt.pos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '不要去近身攻击塔范围内的小兵') action_ratios[selected] = -1 elif hero_info.cfg_id == '101' and 28 <= selected < 38: # 专门针对查尔斯的跳跃技能 skillid = int((selected - 18) / 10 + 1) [tgtid, tgtpos] = LineModel.choose_skill_target( selected - 18 - (skillid - 1) * 10, state_info, skillid, hero_info.hero_name, hero_info.pos, rival_hero_info.hero_name) if tgtpos is not None: if StateUtil.cal_distance(tgtpos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '跳跃技能在朝着塔下的目标') action_ratios[selected] = -1 return action_ratios
def __init__(self, hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred=0, avail_action=True): self.hero_name = hero_name self.action = action self.skillid = str(skillid) self.tgtid = str(tgtid) # 这里需要确保进来的位置是整数 self.tgtpos = PosStateInfo(int(tgtpos.x), int(tgtpos.y), int(tgtpos.z)) if tgtpos is not None else None self.fwd = fwd self.itemid = itemid self.output_index = output_index self.reward = reward self.vpred = vpred # for ppo self.avail_action = avail_action self.gold = 0 self.lv = 0
def set_move_target(hero_info, fwd, time_second=0.5): # base = StateUtil.get_basement(hero_info) # return base return PosStateInfo( hero_info.pos.x + time_second * fwd.x * hero_info.speed / 1000 * 3, -80, hero_info.pos.z + time_second * fwd.z * hero_info.speed / 1000 * 3)
def decode(obj, unit_name): unit_name = unit_name state = obj['state'] if 'state' in obj else None cfg_id = obj['cfgID'] if 'cfgID' in obj else None pos = PosStateInfo.decode(obj['pos']) if 'pos' in obj else None fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None hp = obj['hp'] if 'hp' in obj else None maxhp = obj['maxhp'] if 'maxhp' in obj else None speed = obj['speed'] if 'speed' in obj else None moving = obj['moving'] if 'moving' in obj else None chrtype = obj['chrtype'] if 'chrtype' in obj else None att = obj['att'] if 'att' in obj else None attspeed = obj['attspeed'] if 'attspeed' in obj else None mag = obj['mag'] if 'mag' in obj else None attpen = obj['attpen'] if 'attpen' in obj else None magpen = obj['magpen'] if 'magpen' in obj else None attpenrate = obj['attpenrate'] if 'attpenrate' in obj else None magpenrate = obj['magpenrate'] if 'magpenrate' in obj else None movelock = obj['movelock'] if 'movelock' in obj else None vis1 = obj['vis1'] if 'vis1' in obj else None vis2 = obj['vis2'] if 'vis2' in obj else None vis3 = obj['vis3'] if 'vis3' in obj else None team = obj['team'] if 'team' in obj else (None if pos is None else (0 if pos.x < 0 else 1)) return UnitStateInfo(unit_name, state, cfg_id, pos, fwd, hp, maxhp, speed, moving, chrtype, att, attspeed, mag, attpen, magpen, attpenrate, magpenrate, movelock, vis1, vis2, vis3, team)
def cal_soldier_wave_point(state_info, unit_index_list): cached_x = 0 cached_z = 0 for unit_name in unit_index_list: unit = state_info.get_unit(unit_name) cached_x += unit.pos.x cached_z += unit.pos.z return PosStateInfo(int(cached_x / len(unit_index_list)), int(-80), int(cached_z / len(unit_index_list)))
def get_hp_restore_place(state_info, hero): for unit in state_info.units: if unit.team == hero.team and (unit.pos.x == 17110 or unit.pos.x == -17110): # 移动到塔后侧 near_tower_x = unit.pos.x - 3000 if hero.team == 0 else unit.pos.x + 3000 pos = PosStateInfo(near_tower_x, unit.pos.y, unit.pos.z) return pos return None
def decode(obj): hero_name = obj['hero_name'] action = obj['action'] skillid = obj['skillid'] if 'skillid' in obj else None tgtid = obj['tgtid'] if 'tgtid' in obj else None tgtpos = PosStateInfo.decode(obj['tgtpos']) if 'tgtpos' in obj else None fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None itemid = obj['itemid'] if 'itemid' in obj else None output_index = obj['output_index'] if 'output_index' in obj else None reward = obj['reward'] if 'reward' in obj else None vpred = obj['vpred'] if 'vpred' in obj else None return CmdAction(hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred)
def policy_move_retreat(hero_info): if hero_info.team == 0: mov_idx = 6 else: mov_idx = 0 fwd = StateUtil.mov(mov_idx) tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15, hero_info.pos.y + fwd.y * 15, hero_info.pos.z + fwd.z * 15) action = CmdAction(hero_info.hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, mov_idx, None) return action
def get_attack_tower_action(hero_name, hero_info, tower_unit): # 因为目前模型中侦测塔的范围较大,可能出现攻击不到塔的情况 # 所以需要先接近塔 # 使用tgtpos,而不是fwd。move命令中fwd坐标系比较奇怪 if StateUtil.cal_distance( hero_info.pos, tower_unit.pos) > StateUtil.ATTACK_UNIT_RADIUS: fwd = tower_unit.pos.fwd(hero_info.pos) [fwd, output_index] = Replayer.get_closest_fwd(fwd) tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15, hero_info.pos.y + fwd.y * 15, hero_info.pos.z + fwd.z * 15) print("朝塔移动,", hero_name, "hero_pos", hero_info.pos.to_string(), "tower_pos", tower_unit.pos.to_string(), "fwd", fwd.to_string(), "output_index", output_index) action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, output_index, None) else: action_idx = 11 action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tower_unit.unit_name, None, None, None, action_idx, None) return action
class CmdAction(object): def __init__(self, hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred=0, avail_action=True): self.hero_name = hero_name self.action = action self.skillid = str(skillid) self.tgtid = str(tgtid) # 这里需要确保进来的位置是整数 self.tgtpos = PosStateInfo(int(tgtpos.x), int(tgtpos.y), int(tgtpos.z)) if tgtpos is not None else None self.fwd = fwd self.itemid = itemid self.output_index = output_index self.reward = reward self.vpred = vpred # for ppo self.avail_action = avail_action self.gold = 0 self.lv = 0 @staticmethod def decode(obj): hero_name = obj['hero_name'] action = obj['action'] skillid = obj['skillid'] if 'skillid' in obj else None tgtid = obj['tgtid'] if 'tgtid' in obj else None tgtpos = PosStateInfo.decode(obj['tgtpos']) if 'tgtpos' in obj else None fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None itemid = obj['itemid'] if 'itemid' in obj else None output_index = obj['output_index'] if 'output_index' in obj else None reward = obj['reward'] if 'reward' in obj else None vpred = obj['vpred'] if 'vpred' in obj else None return CmdAction(hero_name, action, skillid, tgtid, tgtpos, fwd, itemid, output_index, reward, vpred) def encode(self): json_map = {'hero_name': self.hero_name, 'action': self.action, 'skillid': self.skillid, 'tgtid': self.tgtid, \ 'itemid':self.itemid, 'output_index': self.output_index, 'reward': self.reward, 'vpred': self.vpred} if self.tgtpos is not None: json_map['tgtpos'] = self.tgtpos.encode() if self.fwd is not None: json_map['fwd'] = self.fwd.encode() return dict((k, v) for k, v in json_map.items() if v is not None)
def get_retreat_pos(state_info, hero, line_index): towers = [] for unit in state_info.units: if StateUtil.if_unit_tower( unit.unit_name) and unit.team == hero.team: if StateUtil.if_in_line(unit, line_index) >= 0: # 在英雄后面的塔 if hero.team == 0 and hero.pos.x > unit.pos.x: towers.append(unit) elif hero.team == 1 and hero.pos.x < unit.pos.x: towers.append(unit) if len(towers) > 0: towers.sort(key=lambda t: math.fabs(hero.pos.x - t.pos.x), reverse=False) near_tower = towers[0] # 移动到塔后侧 near_tower_x = near_tower.pos.x - 3000 if hero.team == 0 else near_tower.pos.x + 3000 near_tower_z = near_tower.pos.z - 2000 if hero.team == 0 else near_tower.pos.z + 2000 pos = PosStateInfo(near_tower_x, near_tower.pos.y, near_tower_z) return pos else: basement_pos = StateUtil.BASEMENT_TEAM_1 if hero.team == 1 else StateUtil.BASEMENT_TEAM_0 return basement_pos
def play_move(hero_info, fwd, time_second=0.5): return PosStateInfo( hero_info.pos.x + time_second * fwd.x * hero_info.speed / 1000 * 1.2, -80, hero_info.pos.z + time_second * fwd.z * hero_info.speed / 1000 * 1.2)
def decode(obj, hero_name): hero_name = hero_name state = obj['state'] if 'state' in obj else None cfg_id = obj['cfgID'] if 'cfgID' in obj else None pos = PosStateInfo.decode(obj['pos']) if 'pos' in obj else None fwd = FwdStateInfo.decode(obj['fwd']) if 'fwd' in obj else None hp = obj['hp'] if 'hp' in obj else None maxhp = obj['maxhp'] if 'maxhp' in obj else None maxmp = obj['maxmp'] if 'maxmp' in obj else None #TODO 如果没有信息,mp默认等于0? mp = obj['mp'] if 'mp' in obj else None speed = obj['speed'] if 'speed' in obj else None att = obj['att'] if 'att' in obj else None gold = obj['gold'] if 'gold' in obj else None hprec = obj['Hprec'] if 'Hprec' in obj else None # 是否可见信息(下路阵营,上路阵营,中立生物是否可见) vis1 = obj['vis1'] if 'vis1' in obj else None vis2 = obj['vis2'] if 'vis2' in obj else None vis3 = obj['vis3'] if 'vis3' in obj else None # 更新字段 attspeed = obj['attspeed'] if 'attspeed' in obj else None mag = obj['mag'] if 'mag' in obj else None attpen = obj['attpen'] if 'attpen' in obj else None magpen = obj['magpen'] if 'magpen' in obj else None attpenrate = obj['attpenrate'] if 'attpenrate' in obj else None magpenrate = obj['magpenrate'] if 'magpenrate' in obj else None movelock = obj['movelock'] if 'movelock' in obj else None equips = [] if 'equip0' in obj: equips.append(EquipStateInfo.decode(obj['equip0'], 'equip0')) if 'equip1' in obj: equips.append(EquipStateInfo.decode(obj['equip1'], 'equip1')) if 'equip2' in obj: equips.append(EquipStateInfo.decode(obj['equip2'], 'equip2')) if 'equip3' in obj: equips.append(EquipStateInfo.decode(obj['equip3'], 'equip3')) if 'equip4' in obj: equips.append(EquipStateInfo.decode(obj['equip4'], 'equip4')) if 'equip5' in obj: equips.append(EquipStateInfo.decode(obj['equip5'], 'equip5')) if 'equip6' in obj: equips.append(EquipStateInfo.decode(obj['equip6'], 'equip6')) if 'equip7' in obj: equips.append(EquipStateInfo.decode(obj['equip7'], 'equip7')) buffs = obj['buff'] if 'buff' in obj else [] skills = [] skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill0') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill1') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill2') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill3') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill4') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill5') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill6') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill7') if skill_info is not None: skills.append(skill_info) skill_info = HeroStateInfo.decode_add_skill(obj, 'Skill8') if skill_info is not None: skills.append(skill_info) # 根据其实位置来决定英雄阵营,注意,这里的判断只有在第一帧时候是合理的,后续的其实应该根据merge来判断 # 上路是team0,下路team1 team = obj['team'] if 'team' in obj else (None if pos is None else (0 if pos.x < 0 else 1)) return HeroStateInfo(hero_name, state, cfg_id, pos, fwd, hp, maxhp, mp, maxmp, speed, att, gold, hprec, equips, buffs, skills, vis1, vis2, vis3, attspeed, mag, attpen, magpen, attpenrate, magpenrate, movelock, team)
def get_tower_behind(tower_info, hero, line_index): near_tower_x = tower_info.pos.x - 4000 if hero.team == 0 else tower_info.pos.x + 4000 pos = PosStateInfo(near_tower_x, tower_info.pos.y, tower_info.pos.z) return pos
class TeamBattleTrainer: BATTLE_POINT_X = 0 BATTLE_POINT_Z = -31000 BATTLE_CIRCLE = PosStateInfo(BATTLE_POINT_X, 0, BATTLE_POINT_Z) BATTLE_CIRCLE_RADIUS_BATTLE_START = 8 BATTLE_CIRCLE_RADIUS_BATTLE_ING = 10 SHRINK_TIME = 60 def __init__(self, act_size, save_root_path, battle_id, model_util, gamma, enable_policy): self.act_size = act_size self.battle_id = battle_id self.model_util = model_util self.state_cache = [] self.heros = [ '27', '28', '29', '30', '31', '32', '33', '34', '35', '36' ] self.raw_log_file = open( save_root_path + '/raw_' + str(battle_id) + '.log', 'w') self.dead_heroes = [] self.battle_started = -1 self.model_caches = {} self.rebooting = False self.enable_policy = enable_policy for hero in self.heros: self.model_caches[hero] = TEAM_PPO_CACHE(gamma) # 计算奖励值时候因为要看历史数据,所以需要这两个当时的状态信息。后续可以考虑如何避免这种缓存 self.battle_heroes_cache = [] self.dead_heroes_cache = [] self.data_inputs = [] def save_raw_log(self, raw_log_str): self.raw_log_file.write( strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + raw_log_str + "\n") self.raw_log_file.flush() def build_response(self, raw_state_str): self.save_raw_log(raw_state_str) prev_state_info = self.state_cache[-1] if len( self.state_cache) > 0 else None response_strs = [] # 解析客户端发送的请求 obj = JSON.loads(raw_state_str) raw_state_info = StateInfo.decode(obj) # 重开时候会有以下报文 {"wldstatic":{"ID":9051},"wldruntime":{"State":0}} if raw_state_info.tick == -1: return {"ID": raw_state_info.battleid, "tick": -1} if raw_state_info.tick <= StateUtil.TICK_PER_STATE and ( prev_state_info is None or prev_state_info.tick > raw_state_info.tick): print("clear") prev_state_info = None self.state_cache = [] self.battle_started = -1 self.battle_heroes_cache = [] self.dead_heroes = [] self.dead_heroes_cache = [] self.data_inputs = [] self.rebooting = False elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE: # 不是开始帧的话直接返回重启游戏 # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗 print("battle_id", self.battle_id, "tick", raw_state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str state_info = StateUtil.update_state_log(prev_state_info, raw_state_info) hero = state_info.get_hero("27") if hero is None or hero.hp is None: # 偶然情况处理,如果找不到英雄,直接重开 print("battle_id", self.battle_id, "tick", state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str # 战斗前准备工作 if len(self.state_cache) == 0: # 第一帧的时候,添加金钱和等级 for hero in self.heros: add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None, None, None, None, None, None, None) add_gold_cmd.gold = 3000 add_gold_str = StateUtil.build_command(add_gold_cmd) response_strs.append(add_gold_str) add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None, None, None, None, None, None) add_lv_cmd.lv = 9 add_lv_str = StateUtil.build_command(add_lv_cmd) response_strs.append(add_lv_str) elif len(self.state_cache) > 1: # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧 for hero in self.heros: upgrade_cmd = self.upgrade_skills(state_info, hero) if upgrade_cmd is not None: response_strs.append(upgrade_cmd) buy_cmd = self.buy_equip(state_info, hero) if buy_cmd is not None: response_strs.append(buy_cmd) for hero in self.heros: # 判断是否英雄死亡 if prev_state_info is not None: dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) if dead == 1 and hero not in self.dead_heroes: print("battle_id", self.battle_id, "tick", state_info.tick, "英雄死亡", hero, "tick", state_info.tick) self.dead_heroes.append(hero) # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定 # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动 #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来 # 这里会排除掉死亡的英雄,他们不需要再加入团战 # 团战范围在收缩 battle_range = self.cal_battle_range( len(self.state_cache) - self.battle_started) heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range( state_info, self.heros, self.dead_heroes, battle_range) # 存活英雄 battle_heros = list(heroes_in_range) battle_heros.extend(heroes_out_range) # 缓存参战情况和死亡情况,用于后续训练 self.battle_heroes_cache.append(battle_heros) self.dead_heroes_cache.append(list(self.dead_heroes)) if state_info.tick >= 142560: debuginfo = True # 团战还没有开始,有英雄还在圈外 if len(heroes_out_range) > 0: if self.battle_started > -1: print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外", ','.join(heroes_out_range), "battle_range", battle_range) # 移动到两个开始战斗地点附近 # 如果是团战开始之后,移动到团战中心点 for hero in heroes_out_range: start_point_x = randint(0, 8000) start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0 start_point_z += randint(-4000, 4000) if TeamBattleUtil.get_hero_team(hero) == 0: start_point_z *= -1 start_point_z += TeamBattleTrainer.BATTLE_POINT_Z tgt_pos = PosStateInfo(start_point_x, 0, start_point_z) move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) mov_cmd_str = StateUtil.build_command(move_action) response_strs.append(mov_cmd_str) # 团战已经开始 elif not self.rebooting: if self.battle_started == -1: self.battle_started = len(self.state_cache) # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态 state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible( state_info, self.state_cache) # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range) # 跟队伍,每个队伍得到行为 team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range) team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team( state_info, team_a, heroes_in_range) team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team( state_info, team_b, heroes_in_range) # 如果模型已经开战,重启战斗 if (model_upgrade_a or model_upgrade_b ) and self.battle_started < len(self.state_cache) + 1: print("battle_id", self.battle_id, "因为模型升级,重启战斗", self.battle_started, len(self.state_cache)) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str data_input_map = {} for action_cmd, data_input in zip(team_actions_a + team_actions_b, input_list_a + input_list_b): action_str = StateUtil.build_command(action_cmd) response_strs.append(action_str) state_info.add_action(action_cmd) data_input_map[action_cmd.hero_name] = data_input # 缓存所有的模型输入,用于后续训练 self.data_inputs.append(data_input_map) # 添加记录到缓存中 self.state_cache.append(state_info) # 将模型行为加入训练缓存,同时计算奖励值 # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟 last_x_index = 2 if self.battle_started > -1 and len(self.data_inputs) >= last_x_index: if self.rebooting: # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练 print("battle_id", self.battle_id, "tick", state_info.tick, "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启") # 重启游戏 response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] else: state_index = len(self.state_cache) - last_x_index win, win_team, left_heroes = self.remember_replay_heroes( -last_x_index, state_index, battle_range) # 团战结束条件 # 首先战至最后一人 # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range) # if self.battle_started: # if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1): if win == 1: # 重启游戏 print('battle_id', self.battle_id, "重启游戏", "剩余人员", ','.join(left_heroes)) response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] self.rebooting = True # battle_heros = self.search_team_battle(state_info) # if len(battle_heros) > 0: # print("team battle heros", ';'.join(battle_heros)) # # heros_need_model = [] # for hero in self.heros: # # 判断是否英雄死亡 # if prev_state_info is not None: # dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) # if dead == 1 and hero not in self.dead_heroes: # self.dead_heroes.append(hero) # # # 复活的英雄不要再去参团 # if hero in self.dead_heroes: # continue # # # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE) # if hero not in battle_heros: # # 移动到团战点附近,添加部分随机 # rdm_delta_x = randint(0, 1000) # rdm_delta_z = randint(0, 1000) # tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z) # move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) # mov_cmd_str = StateUtil.build_command(move_action) # response_strs.append(mov_cmd_str) # else: # # 启动模型决策 # heros_need_model.append(hero) # # if len(heros_need_model) > 0: # action_cmds = self.get_model_actions(state_info, heros_need_model) # for action_cmd in action_cmds: # action_str = StateUtil.build_command(action_cmd) # response_strs.append(action_str) # state_info.add_action(action_cmd) #TODO 记录模型输出,用于后续训练 # 返回结果给游戏端 rsp_obj = { "ID": state_info.battleid, "tick": state_info.tick, "cmd": response_strs } rsp_str = JSON.dumps(rsp_obj) print('battle_id', self.battle_id, 'response', rsp_str) return rsp_str def cal_battle_range(self, action_times): battle_range = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START if self.battle_started == -1 \ else TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_ING - int(action_times / TeamBattleTrainer.SHRINK_TIME) return battle_range # last_x_index 表示这是倒数第x个状态,这里不用准确数字而是用-1、-2是因为state_cache,data_inputs长度不同 # state_index 表示状态在帧缓存中的位置,用于计算奖励值折旧时候使用 def remember_replay_heroes(self, last_x_index, state_index, battle_range): prev_state = self.state_cache[last_x_index - 1] state_info = self.state_cache[last_x_index] next_state = self.state_cache[last_x_index + 1] battle_heroes = self.battle_heroes_cache[last_x_index] dead_heroes = self.dead_heroes_cache[last_x_index] data_input_map = self.data_inputs[last_x_index] # 计算奖励值情况 state_info, win, win_team, left_heroes = self.model_util.cal_rewards( prev_state, state_info, next_state, battle_heroes, dead_heroes) print("battle_id", self.battle_id, "tick", state_info.tick, "remember_replay_heroes", "win", win, "剩余人员", ','.join(left_heroes), "输入—战斗人员", ','.join(battle_heroes), "输入—阵亡人员", ','.join(dead_heroes)) # 设置一场战斗的最大游戏时长,到时直接重启,所有玩家最终奖励为零,没有输赢 if win == 0 and battle_range <= 0: print('battle_id', self.battle_id, "到达游戏最大时长,直接重启,需要确认是否有异常情况") win = 1 for action in state_info.actions: # 行为有可能为空,比如英雄已经挂了,但是他最后的动作在后续几帧都可能有影响,也有可能是因为 # print('battle_id', self.battle_id, "remember_replay_heroes", action.hero_name) data_input = data_input_map[ action. hero_name] if action.action != CmdActionEnum.EMPTY else None self.remember_train_data(state_info, state_index, data_input, action.hero_name, win) # 如果战斗结束,需要训练所有模型 if win == 1: for hero_name in self.heros: model_cache = self.model_caches[hero_name] o4r, batch_size = model_cache.output4replay() # 提交给训练模块 print('battle_id', self.battle_id, 'trainer', hero_name, '添加训练集', batch_size) if o4r is None: print('battle_id', self.battle_id, "训练数据异常") else: self.model_util.set_train_data(hero_name, self.battle_id, o4r, batch_size) model_cache.clear_cache() return win, win_team, left_heroes # 保存训练数据,计算行为奖励,触发训练 #TODO 在游戏重启时候需要同时训练所有的模型 def remember_train_data(self, state_info, state_index, data_input, hero_name, new): hero_act = state_info.get_hero_action(hero_name) model_cache = self.model_caches[hero_name] if hero_act is not None: if hero_act.reward is None: print("Error", 'battle_id', self.battle_id, hero_act.hero_name, hero_act.action, hero_act.skillid) return # prev_new 简单计算,可能会有问题 prev_new = model_cache.get_prev_new() ob = data_input ac = hero_act.output_index vpred = hero_act.vpred rew = hero_act.reward model_cache.remember(ob, ac, vpred, new, rew, prev_new, state_index, self.battle_id, hero_name) @staticmethod def all_in_battle_range(state_info, all_heroes, dead_heroes, battle_range): heroes_in = [] heroes_out = [] for hero in all_heroes: if hero not in dead_heroes: hero_info = state_info.get_hero(hero) dis = TeamBattleTrainer.in_battle_range( hero_info.pos, battle_range) if dis != -1: heroes_out.append(hero) # print('battle_id', state_info.battleid, "all_in_battle_range", "found hero not in circle", hero, "battle_range", battle_range, "distance", dis) else: heroes_in.append(hero) return heroes_in, heroes_out # 考察一个英雄是否在团战圈中 @staticmethod def in_battle_range(pos, battle_range): dis = StateUtil.cal_distance2(pos, TeamBattleTrainer.BATTLE_CIRCLE) if dis < battle_range * 1000 + 500: return -1 return dis def search_team_battle(self, state_info): max_team = set() for hero in self.heros: battle_heros = self.search_team_battle_hero(state_info, hero) if len(battle_heros) > 1 and len(battle_heros) > len(max_team): max_team = battle_heros return max_team def search_team_battle_hero(self, state_info, hero): # 检查是否有团战,并且得到团战的范围内所有的单位 # 团战范围的定义 # 首先从一个英雄开始找起,如果它周围有敌人,就把敌人和自己人全都列为范围内,然后用新的人物继续寻找 # 注:这里只找一个开团点 checked_heros = set() team_battle_heros = set() # 找到第一个周围有敌人的 team_battle_heros.add(hero) while len(checked_heros) < len(team_battle_heros): for hero in team_battle_heros.copy(): if hero not in checked_heros: near_enemy_heroes = StateUtil.get_nearby_enemy_heros( state_info, hero, TeamBattleTrainer.MODEL_RANGE) for enemy in near_enemy_heroes: team_battle_heros.add(enemy.hero_name) checked_heros.add(hero) return team_battle_heros def get_model_actions_team(self, state_info, team, battle_heroes, debug=False): # 第一个人先选,然后第二个人,一直往后,后面的人会在参数中添加上之前人的行为 # 同时可以变成按照模型给出maxq大小来决定谁先选 # 这样的好处是所有人选择的行为就是最后执行的行为 # 暂时为随机英雄先选 # first_hero = heroes[0] # 得到当前团战范围,因为会收缩 battle_range = self.cal_battle_range( len(self.state_cache) - self.battle_started) # 首先得到当前情况下每个英雄的基础输入集和所有无效的选择 hero_input_map = {} hero_unavail_list_map = {} for hero in team: data_input = TeamBattleInput.gen_input(state_info, hero, battle_heroes) data_input = np.array(data_input) hero_input_map[hero] = data_input unaval_list = TeamBattleTrainer.list_unaval_actions( self.act_size, state_info, hero, battle_heroes, battle_range) unaval_list_str = ' '.join( str("%.4f" % float(act)) for act in unaval_list) hero_unavail_list_map[hero] = unaval_list if debug: print("battle_id", self.battle_id, "tick", state_info.tick, "hero", hero, "model remove_unaval_actions", unaval_list_str) # 得到每个英雄的推荐行为 hero_recommend_list_map = {} for hero in team: friends, opponents = TeamBattleUtil.get_friend_opponent_heros( battle_heroes, hero) hero_info = state_info.get_hero(hero) recommend_list = TeamBattlePolicy.select_action_by_strategy( state_info, hero_info, friends, opponents) hero_recommend_list_map[hero] = recommend_list # 开始挑选英雄行为,每次根据剩余英雄的最优选择,根据Q大小来排序 action_cmds = [] input_list = [] left_heroes = list(team) model_upgrade = False while len(left_heroes) > 0: cur_max_q = -1 chosen_hero = left_heroes[0] chosen_action_list = None for hero in left_heroes: # 对于之前的英雄行为,加入输入 hero_info = state_info.get_hero(hero) data_input = hero_input_map[hero] for prev_action in action_cmds: data_input = TeamBattleInput.add_other_hero_action( data_input, hero_info, prev_action, debug) unaval_list = hero_unavail_list_map[hero] recommend_list = hero_recommend_list_map[hero] action_list, explor_value, vpreds, clear_cache = self.model_util.get_action_list( self.battle_id, hero, data_input) action_str = ' '.join( str("%.4f" % float(act)) for act in action_list) max_q = TeamBattleTrainer.get_max_q(action_list, unaval_list, recommend_list) if debug: print("battle_id", self.battle_id, "tick", state_info.tick, "本轮行为候选", "hero", hero, "max_q", max_q, "model action list", action_str) # 允许等于是为了支持max_q等于-1的情况 if max_q >= cur_max_q: cur_max_q = max_q chosen_hero = hero chosen_action_list = action_list # 如果模型升级了,需要清空所有缓存用作训练的行为,并且重启游戏 if clear_cache: print('battle_id', self.battle_id, '模型升级,清空训练缓存') for hero_name in self.heros: self.model_caches[hero_name].clear_cache() model_upgrade = True # 使用最大q的英雄的行为 unaval_list = hero_unavail_list_map[chosen_hero] recommend_list = hero_recommend_list_map[hero] friends, opponents = TeamBattleUtil.get_friend_opponent_heros( battle_heroes, chosen_hero) action_cmd, max_q, selected = TeamBattleTrainer.get_action_cmd( chosen_action_list, unaval_list, recommend_list, state_info, chosen_hero, friends, opponents) if debug: print("battle_id", self.battle_id, "tick", state_info.tick, "hero", chosen_hero, "model get_action", StateUtil.build_command(action_cmd), "max_q", max_q, "selected", selected) # 更新各个状态集 action_cmds.append(action_cmd) input_list.append(data_input) left_heroes.remove(chosen_hero) return action_cmds, input_list, model_upgrade def get_model_actions(self, state_info, heros, debug=False): # 第一个人先选,然后第二个人,一直往后,后面的人会在参数中添加上之前人的行为 # TODO 同时可以变成按照模型给出maxq大小来决定谁先选 # 这样的好处是所有人选择的行为就是最后执行的行为 # 暂时为随机英雄先选 random_heros = list(heros) shuffle(random_heros) # 得到当前团战范围,因为会收缩 battle_range = self.cal_battle_range( len(self.state_cache) - self.battle_started) action_cmds = [] input_list = [] model_upgrade = False for hero in random_heros: hero_info = state_info.get_hero(hero) data_input = TeamBattleInput.gen_input(state_info, hero) data_input = np.array(data_input) # 对于之前的英雄行为,加入输入 for prev_action in action_cmds: data_input = TeamBattleInput.add_other_hero_action( data_input, hero_info, prev_action, debug) action_list, explor_value, vpreds, clear_cache = self.model_util.get_action_list( self.battle_id, hero, data_input) action_str = ' '.join( str("%.4f" % float(act)) for act in action_list) if debug: print("battle_id", self.battle_id, "tick", state_info.tick, "hero", hero, "model action list", action_str) unaval_list = TeamBattleTrainer.list_unaval_actions( action_list, state_info, hero, heros, battle_range) unaval_list_str = ' '.join( str("%.4f" % float(act)) for act in unaval_list) if debug: print("battle_id", self.battle_id, "tick", state_info.tick, "hero", hero, "model remove_unaval_actions", unaval_list_str) friends, opponents = TeamBattleUtil.get_friend_opponent_heros( heros, hero) action_cmd, max_q, selected = TeamBattleTrainer.get_action_cmd( action_list, unaval_list, state_info, hero, friends, opponents) if debug: print("battle_id", self.battle_id, "tick", state_info.tick, "hero", hero, "model get_action", StateUtil.build_command(action_cmd), "max_q", max_q, "selected", selected) # 如果模型升级了,需要清空所有缓存用作训练的行为,并且重启游戏 if clear_cache: print('battle_id', self.battle_id, '模型升级,清空训练缓存') for hero_name in self.heros: self.model_caches[hero_name].clear_cache() model_upgrade = True action_cmds.append(action_cmd) input_list.append(data_input) return action_cmds, input_list, model_upgrade @staticmethod # 过滤输出结果,删除掉不可执行的选择 # 这里有两个思路,像原来一样只执行可以执行的 # 第二种是面对不可执行的,我们就选择逼近对方 # 输出信息: # 移动:八个方向;物理攻击:五个攻击目标;技能1:五个攻击目标;技能2:五个攻击目标;技能3:五个攻击目标 # 技能攻击目标默认为对方英雄。如果是辅助技能,目标调整为自己人 # 对于技能可以是自己也可以是对方的,目前无法处理 def list_unaval_actions(act_size, state_info, hero_name, team_battle_heros, battle_range, debug=False): friends, opponents = TeamBattleUtil.get_friend_opponent_heros( team_battle_heros, hero_name) avail_list = [-1] * act_size for i in range(act_size): hero = state_info.get_hero(hero_name) selected = i if selected < 8: # move # 不再检查movelock,因为攻击硬直也会造成这个值变成false(false表示不能移动) # 屏蔽会离开战圈的移动 fwd = StateUtil.mov(selected) move_pos = TeamBattleUtil.play_move(hero, fwd) in_range = TeamBattleTrainer.in_battle_range( move_pos, battle_range) if in_range != -1: avail_list[selected] = -1 else: avail_list[selected] = 1 continue elif selected < 13: # 物理攻击:五个攻击目标 target_index = selected - 8 target_hero = TeamBattleUtil.get_target_hero( hero_name, friends, opponents, target_index) if target_hero is None: avail_list[selected] = -1 if debug: print("找不到对应目标英雄") continue rival_info = state_info.get_hero(target_hero) dist = StateUtil.cal_distance(hero.pos, rival_info.pos) # 英雄不可见 if not rival_info.is_enemy_visible(): avail_list[selected] = -1 if debug: print("英雄不可见") continue # 英雄太远,放弃普攻 # if dist > self.att_dist: if dist > StateUtil.ATTACK_HERO_RADIUS: avail_list[selected] = 0 if debug: print("英雄太远,放弃普攻") continue # 对方英雄死亡时候忽略这个目标 elif rival_info.hp <= 0: avail_list[selected] = -1 if debug: print("对方英雄死亡") continue avail_list[selected] = 1 elif selected < 28: # skill1 # TODO 处理持续施法,目前似乎暂时还不需要 skillid = int((selected - 13) / 5 + 1) if hero.skills[skillid].canuse != True: # 被沉默,被控制住(击晕击飞冻结等)或者未学会技能 avail_list[selected] = -1 if debug: print("技能受限,放弃施法" + str(skillid) + " hero.skills[x].canuse=" + str(hero.skills[skillid].canuse) + " tick=" + str(state_info.tick)) continue if hero.skills[skillid].cost is not None and hero.skills[ skillid].cost > hero.mp: # mp不足 # 特殊情况,德古拉1,2技能是扣除血量 if not (hero.cfg_id == '103' and (skillid == 1 or skillid == 2)): avail_list[selected] = -1 if debug: print("mp不足,放弃施法" + str(skillid)) continue if hero.skills[skillid].cd > 0: # 技能未冷却 avail_list[selected] = -1 if debug: print("技能cd中,放弃施法" + str(skillid)) continue tgt_index = selected - 13 - (skillid - 1) * 5 skill_info = SkillUtil.get_skill_info(hero.cfg_id, skillid) # TODO 这个buff逻辑还没有测试对应的英雄 is_buff = True if skill_info.cast_target == SkillTargetEnum.buff else False is_self = True if skill_info.cast_target == SkillTargetEnum.self else False tgt_hero = TeamBattleUtil.get_target_hero( hero.hero_name, friends, opponents, tgt_index, is_buff, is_self) if tgt_hero is None: avail_list[selected] = -1 if debug: print("找不到对应目标英雄") continue [tgtid, tgtpos] = TeamBattleTrainer.choose_skill_target( tgt_index, state_info, skill_info, hero_name, hero.pos, tgt_hero, debug) if tgtid == -1 or tgtid == 0: avail_list[selected] = tgtid if debug: print("目标不符合施法要求") continue else: # 根据规则再去过滤 policy_avail = TeamBattlePolicy.check_skill_condition( skill_info, state_info, hero, tgt_hero, friends, opponents) if not policy_avail: avail_list[selected] == -1 else: avail_list[selected] = 1 return avail_list @staticmethod def choose_skill_target(selected, state_info, skill_info, hero_name, pos, tgt_hero_name, debug=False): hero_info = state_info.get_hero(hero_name) if selected == 0: # 施法目标为自己 # 首先判断施法目标是不是只限于敌方英雄 if skill_info.cast_target == SkillTargetEnum.self and hero_name != str( tgt_hero_name): if debug: print("施法目标为self,但是对象不是自己") return [-1, None] tgtid = hero_name # TODO 这里有点问题,如果是目标是自己的技能,是不是要区分下目的,否则fwd计算会出现问题 tgtpos = None if selected <= 4: # 攻击对方英雄 tgt_hero = state_info.get_hero(tgt_hero_name) if tgt_hero.team != hero_info.team and not tgt_hero.is_enemy_visible( ): if debug: print("敌方英雄不可见") tgtid = -1 tgtpos = None elif StateUtil.cal_distance(tgt_hero.pos, pos) > skill_info.cast_distance: if debug: print("技能攻击不到对方 %s %s %s" % (tgt_hero_name, StateUtil.cal_distance( tgt_hero.pos, pos), skill_info.cast_distance)) tgtid = 0 tgtpos = None # 对方英雄死亡时候忽略这个目标 elif tgt_hero.hp <= 0: if debug: print("技能攻击不了对方,对方已经死亡") tgtid = -1 tgtpos = None else: tgtid = tgt_hero_name tgtpos = tgt_hero.pos return tgtid, tgtpos @staticmethod def get_max_q(action_list, unaval_list, recommmend_list): q_list = list(action_list) # 如果有推荐的行为,只从中挑选 if len(recommmend_list) > 0: for i in range(len(action_list)): if i not in recommmend_list: q_list[i] = -1 while True: max_q = max(q_list) if max_q <= -1: return max_q selected = q_list.index(max_q) avail_type = unaval_list[selected] if avail_type == -1: # TODO avail_type == 0: 是否考虑技能不可用时候不接近对方 # 不可用行为 q_list[selected] = -1 continue return max_q @staticmethod def get_action_cmd(action_list, unaval_list, recommmend_list, state_info, hero_name, friends, opponents, revert=False): hero = state_info.get_hero(hero_name) found = False # 如果有推荐的行为,只从中挑选 if len(recommmend_list) > 0: for i in range(len(action_list)): if i not in recommmend_list: action_list[i] = -1 print("battle_id", state_info.battleid, "tick", state_info.tick, "hero", hero_name, "根据推荐,只从以下行为中挑选", ",".join(str("%f" % float(act)) for act in action_list), ",".join(str("%f" % float(act)) for act in recommmend_list)) while not found: max_q = max(action_list) if max_q <= -1: action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, hero.pos, None, None, 48, None) return action, max_q, -1 selected = action_list.index(max_q) avail_type = unaval_list[selected] if avail_type == -1: #TODO avail_type == 0: 是否考虑技能不可用时候不接近对方 # 不可用行为 action_list[selected] = -1 continue if selected < 8: # move fwd = StateUtil.mov(selected, revert) # 根据我们的移动公式计算一个目的地,缺点是这样可能被障碍物阻挡,同时可能真的可以移动距离比我们计算的长 tgtpos = TeamBattleUtil.set_move_target(hero, fwd) # tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15, hero.pos.y + fwd.y * 15, hero.pos.z + fwd.z * 15) action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, selected, None) return action, max_q, selected elif selected < 13: # 对敌英雄使用普攻 target_index = selected - 8 target_hero = TeamBattleUtil.get_target_hero( hero.hero_name, friends, opponents, target_index) target_hero_info = state_info.get_hero(target_hero) avail_type = unaval_list[selected] if avail_type == 0: action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, target_hero_info.pos, None, None, selected, None) else: action = CmdAction(hero.hero_name, CmdActionEnum.ATTACK, 0, target_hero, None, None, None, selected, None) return action, max_q, selected elif selected < 28: # skill skillid = int((selected - 13) / 5 + 1) tgt_index = selected - 13 - (skillid - 1) * 5 skill_info = SkillUtil.get_skill_info(hero.cfg_id, skillid) is_buff = True if skill_info.cast_target == SkillTargetEnum.buff else False is_self = True if skill_info.cast_target == SkillTargetEnum.self else False tgt_hero = TeamBattleUtil.get_target_hero( hero.hero_name, friends, opponents, tgt_index, is_buff, is_self) tgt_pos = state_info.get_hero(tgt_hero).pos fwd = tgt_pos.fwd(hero.pos) avail_type = unaval_list[selected] if avail_type == 0: action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, selected, None) else: action = CmdAction(hero.hero_name, CmdActionEnum.CAST, skillid, tgt_hero, tgt_pos, fwd, None, selected, None) return action, max_q, selected def buy_equip(self, state_info, hero_name): # 决定是否购买道具 buy_action = EquipUtil.buy_equip(state_info, hero_name) if buy_action is not None: buy_str = StateUtil.build_command(buy_action) return buy_str def upgrade_skills(self, state_info, hero_name): # 如果有可以升级的技能,优先升级技能3 hero = state_info.get_hero(hero_name) skills = StateUtil.get_skills_can_upgrade(hero) if len(skills) > 0: skillid = 3 if 3 in skills else skills[0] update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE, skillid, None, None, None, None, None, None) update_str = StateUtil.build_command(update_cmd) return update_str
def build_response(self, raw_state_str): self.save_raw_log(raw_state_str) prev_state_info = self.state_cache[-1] if len( self.state_cache) > 0 else None response_strs = [] # 解析客户端发送的请求 obj = JSON.loads(raw_state_str) raw_state_info = StateInfo.decode(obj) # 重开时候会有以下报文 {"wldstatic":{"ID":9051},"wldruntime":{"State":0}} if raw_state_info.tick == -1: return {"ID": raw_state_info.battleid, "tick": -1} if raw_state_info.tick <= StateUtil.TICK_PER_STATE and ( prev_state_info is None or prev_state_info.tick > raw_state_info.tick): print("clear") prev_state_info = None self.state_cache = [] self.battle_started = -1 self.battle_heroes_cache = [] self.dead_heroes = [] self.dead_heroes_cache = [] self.data_inputs = [] self.rebooting = False elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE: # 不是开始帧的话直接返回重启游戏 # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗 print("battle_id", self.battle_id, "tick", raw_state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str state_info = StateUtil.update_state_log(prev_state_info, raw_state_info) hero = state_info.get_hero("27") if hero is None or hero.hp is None: # 偶然情况处理,如果找不到英雄,直接重开 print("battle_id", self.battle_id, "tick", state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str # 战斗前准备工作 if len(self.state_cache) == 0: # 第一帧的时候,添加金钱和等级 for hero in self.heros: add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None, None, None, None, None, None, None) add_gold_cmd.gold = 3000 add_gold_str = StateUtil.build_command(add_gold_cmd) response_strs.append(add_gold_str) add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None, None, None, None, None, None) add_lv_cmd.lv = 9 add_lv_str = StateUtil.build_command(add_lv_cmd) response_strs.append(add_lv_str) elif len(self.state_cache) > 1: # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧 for hero in self.heros: upgrade_cmd = self.upgrade_skills(state_info, hero) if upgrade_cmd is not None: response_strs.append(upgrade_cmd) buy_cmd = self.buy_equip(state_info, hero) if buy_cmd is not None: response_strs.append(buy_cmd) for hero in self.heros: # 判断是否英雄死亡 if prev_state_info is not None: dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) if dead == 1 and hero not in self.dead_heroes: print("battle_id", self.battle_id, "tick", state_info.tick, "英雄死亡", hero, "tick", state_info.tick) self.dead_heroes.append(hero) # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定 # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动 #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来 # 这里会排除掉死亡的英雄,他们不需要再加入团战 # 团战范围在收缩 battle_range = self.cal_battle_range( len(self.state_cache) - self.battle_started) heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range( state_info, self.heros, self.dead_heroes, battle_range) # 存活英雄 battle_heros = list(heroes_in_range) battle_heros.extend(heroes_out_range) # 缓存参战情况和死亡情况,用于后续训练 self.battle_heroes_cache.append(battle_heros) self.dead_heroes_cache.append(list(self.dead_heroes)) if state_info.tick >= 142560: debuginfo = True # 团战还没有开始,有英雄还在圈外 if len(heroes_out_range) > 0: if self.battle_started > -1: print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外", ','.join(heroes_out_range), "battle_range", battle_range) # 移动到两个开始战斗地点附近 # 如果是团战开始之后,移动到团战中心点 for hero in heroes_out_range: start_point_x = randint(0, 8000) start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0 start_point_z += randint(-4000, 4000) if TeamBattleUtil.get_hero_team(hero) == 0: start_point_z *= -1 start_point_z += TeamBattleTrainer.BATTLE_POINT_Z tgt_pos = PosStateInfo(start_point_x, 0, start_point_z) move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) mov_cmd_str = StateUtil.build_command(move_action) response_strs.append(mov_cmd_str) # 团战已经开始 elif not self.rebooting: if self.battle_started == -1: self.battle_started = len(self.state_cache) # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态 state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible( state_info, self.state_cache) # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range) # 跟队伍,每个队伍得到行为 team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range) team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team( state_info, team_a, heroes_in_range) team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team( state_info, team_b, heroes_in_range) # 如果模型已经开战,重启战斗 if (model_upgrade_a or model_upgrade_b ) and self.battle_started < len(self.state_cache) + 1: print("battle_id", self.battle_id, "因为模型升级,重启战斗", self.battle_started, len(self.state_cache)) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str data_input_map = {} for action_cmd, data_input in zip(team_actions_a + team_actions_b, input_list_a + input_list_b): action_str = StateUtil.build_command(action_cmd) response_strs.append(action_str) state_info.add_action(action_cmd) data_input_map[action_cmd.hero_name] = data_input # 缓存所有的模型输入,用于后续训练 self.data_inputs.append(data_input_map) # 添加记录到缓存中 self.state_cache.append(state_info) # 将模型行为加入训练缓存,同时计算奖励值 # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟 last_x_index = 2 if self.battle_started > -1 and len(self.data_inputs) >= last_x_index: if self.rebooting: # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练 print("battle_id", self.battle_id, "tick", state_info.tick, "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启") # 重启游戏 response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] else: state_index = len(self.state_cache) - last_x_index win, win_team, left_heroes = self.remember_replay_heroes( -last_x_index, state_index, battle_range) # 团战结束条件 # 首先战至最后一人 # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range) # if self.battle_started: # if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1): if win == 1: # 重启游戏 print('battle_id', self.battle_id, "重启游戏", "剩余人员", ','.join(left_heroes)) response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] self.rebooting = True # battle_heros = self.search_team_battle(state_info) # if len(battle_heros) > 0: # print("team battle heros", ';'.join(battle_heros)) # # heros_need_model = [] # for hero in self.heros: # # 判断是否英雄死亡 # if prev_state_info is not None: # dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) # if dead == 1 and hero not in self.dead_heroes: # self.dead_heroes.append(hero) # # # 复活的英雄不要再去参团 # if hero in self.dead_heroes: # continue # # # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE) # if hero not in battle_heros: # # 移动到团战点附近,添加部分随机 # rdm_delta_x = randint(0, 1000) # rdm_delta_z = randint(0, 1000) # tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z) # move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) # mov_cmd_str = StateUtil.build_command(move_action) # response_strs.append(mov_cmd_str) # else: # # 启动模型决策 # heros_need_model.append(hero) # # if len(heros_need_model) > 0: # action_cmds = self.get_model_actions(state_info, heros_need_model) # for action_cmd in action_cmds: # action_str = StateUtil.build_command(action_cmd) # response_strs.append(action_str) # state_info.add_action(action_cmd) #TODO 记录模型输出,用于后续训练 # 返回结果给游戏端 rsp_obj = { "ID": state_info.battleid, "tick": state_info.tick, "cmd": response_strs } rsp_str = JSON.dumps(rsp_obj) print('battle_id', self.battle_id, 'response', rsp_str) return rsp_str
class StateUtil: # 注:游戏并不会严格的每528返回一个值,这个只是PC情况,而且中间这个值也可能缩短 TICK_PER_STATE = 528 NEARBY_BASEMENT_RADIUS = 7 ATTACK_HERO_RADIUS = 7 # 13.5 ATTACK_UNIT_RADIUS = 7 # 10 TOWER_ATTACK_RADIUS = 8 # 需要和ATTACK_HERO_RADIUS一致才行 LINE_MODEL_RADIUS = 7 GOLD_GAIN_RADIUS = 11 MAX_RADIUS = 50 BASEMENT_TEAM_0 = PosStateInfo(-75680, -80, 0) BASEMENT_TEAM_1 = PosStateInfo(75140, -80, 0) ATTACK_SKILL_RANGES = { "10101": 2000, "10110": 8000, "10l120": 6000, "10130": 3500, "10200": 2000, "10210": 8000, "10220": 5000, "10230": 6000 } LINE_WAY_POINTS = [[ PosStateInfo(56800, 0, -2800), PosStateInfo(54000, 0, -5100), PosStateInfo(53000, 0, -20000), PosStateInfo(37500, 0, -29500), PosStateInfo(31800, 0, -38700), PosStateInfo(14000, 0, -54500), PosStateInfo(-600, 0, -61000), PosStateInfo(-22100, 0, -47000), PosStateInfo(-33400, 0, -37500), PosStateInfo(-41000, 0, -27000), PosStateInfo(-51000, 0, -13000), PosStateInfo(-56300, 0, 800) ], [ PosStateInfo(58100, 0, -100), PosStateInfo(45100, 0, -2000), PosStateInfo(28800, 0, 500), PosStateInfo(16900, 0, 1000), PosStateInfo(0, 0, -1000), PosStateInfo(-11500, 0, 300), PosStateInfo(-17400, 0, -200), PosStateInfo(-44900, 0, 2600), PosStateInfo(-56500, 0, 1700) ], [ PosStateInfo(56900, 0, 2600), PosStateInfo(54000, 0, 5000), PosStateInfo(54200, 0, 18400), PosStateInfo(43300, 0, 25700), PosStateInfo(36500, 0, 34000), PosStateInfo(26000, 0, 45200), PosStateInfo(0, 0, 60800), PosStateInfo(-20600, 0, 51700), PosStateInfo(-39900, 0, 30000), PosStateInfo(-52900, 0, 14800), PosStateInfo(-56800, 0, 2600) ]] @staticmethod def if_hero_dead(prev_state, cur_state, hero_name): prev_hero = prev_state.get_hero(hero_name) cur_hero = cur_state.get_hero(hero_name) dead = 1 if prev_hero.hp > 0 and cur_hero.hp <= 0 else 0 if dead: breakpoint = 1 return dead @staticmethod def get_attack_cast_dmg(cur_state, next_state, next_next_state, hero_name, rival_hero): dmg = 0 cur_act = cur_state.get_hero_action(hero_name) skill_slot = cur_act.skillid # 只有攻击才会计算对对方英雄造成的伤害 if cur_act.action == CmdActionEnum.CAST: # 对于技能,查看当前帧和后续帧,这个技能造成的伤害 dmg = next_state.get_hero_dmg_skill(hero_name, skill_slot, rival_hero) dmg += next_next_state.get_hero_dmg_skill(hero_name, skill_slot, rival_hero) elif cur_act.action == CmdActionEnum.ATTACK: # 对于物攻,同样因为伤害有延迟,先检查玩家在下一帧的行动 # 如果不是物攻,这读取这两帧中间的物攻伤害 next_act = next_state.get_hero_action(hero_name) if next_act is None or next_act.action != CmdActionEnum.ATTACK: dmg = next_state.get_hero_dmg_skill(hero_name, skill_slot, rival_hero) dmg += next_next_state.get_hero_dmg_skill( hero_name, skill_slot, rival_hero) else: # 否则只计算当前帧的,简单起见 dmg = next_state.get_hero_dmg_skill(hero_name, skill_slot, rival_hero) return dmg @staticmethod # 固定检测中路第一个塔是否被摧毁了 def if_first_tower_destroyed_in_middle_line(state_info): for unit in state_info.units: if unit.pos.x == 17110 or unit.pos.x == -17110: if unit.hp <= 0: print(unit.unit_name + '塔被摧毁, win:' + str(unit.team) + " detail:" + unit.pos.to_string()) return unit.team return None @staticmethod def get_tower_hp_change(state_info, next_info, hero_name, line_idx, self_tower=True): hero_state = state_info.get_hero(hero_name) near_own_towers = StateUtil.get_near_towers_in_line( state_info, hero_state, line_idx, StateUtil.LINE_MODEL_RADIUS) if self_tower: near_own_towers = [ t for t in near_own_towers if t.team == hero_state.team ] else: near_own_towers = [ t for t in near_own_towers if t.team != hero_state.team ] hp_change = 0 destroyed = False for tower in near_own_towers: next_state_tower = next_info.get_unit(tower.unit_name) if next_state_tower is None or next_state_tower.hp <= 0: hp_change += float(tower.hp) / tower.maxhp destroyed = True else: hp_change += float(tower.hp - next_state_tower.hp) / tower.maxhp return hp_change, destroyed @staticmethod def get_skills_can_upgrade(hero_info): skills = [] for i in range(1, 4): skill_info = hero_info.skills[i] if skill_info.up: skills.append(i) return skills @staticmethod def get_basement(hero_info): return StateUtil.BASEMENT_TEAM_1 if hero_info.team == 1 else StateUtil.BASEMENT_TEAM_0 @staticmethod def if_hero_at_basement(hero_info): basement = StateUtil.BASEMENT_TEAM_1 if hero_info.team == 1 else StateUtil.BASEMENT_TEAM_0 distance = StateUtil.cal_distance(hero_info.pos, basement) if distance < StateUtil.NEARBY_BASEMENT_RADIUS: return True else: return False @staticmethod def if_unit_monster(unit_info): # TODO 需要两个boss的id if int(unit_info.cfg_id) == 612 or int( unit_info.cfg_id) == 6410 or int(unit_info.cfg_id) == 611: return True return False @staticmethod def if_unit_tower(unit_name): if 26 >= int(unit_name) > 0: return True return False @staticmethod def if_unit_hero(unit_name): if 27 <= int(unit_name) <= 28: return True return False @staticmethod def if_unit_soldier(unit_cfgid): if int(unit_cfgid) == 911 or int(unit_cfgid) == 912 or int( unit_cfgid) == 913 or int(unit_cfgid) == 914: return True return False @staticmethod def get_heros_in_team(state_info, team_id): return [hero for hero in state_info.heros if hero.team == team_id] @staticmethod def get_units_in_team(state_info, team_id): return [ unit for unit in state_info.units if unit.team == team_id and unit.state == 'in' and unit.hp > 0 ] @staticmethod def get_dead_units_in_line(state_info, team_id, line_index, hero_info=None, search_range=MAX_RADIUS): result = [] for unit in state_info.units: # 小兵死亡最后一条maxhp=0, hp=1,state=out,倒数第二条 maxhp正常,hp=0 state=in # 我们还是以hp=0作为死亡的信息吧 if unit.hp <= 0 and unit.team == team_id: if StateUtil.if_in_line(unit, line_index) > 0: if hero_info is not None: if StateUtil.cal_distance( unit.pos, hero_info.pos) <= search_range: result.append(unit) else: result.append(unit) return result @staticmethod def if_unit_long_range_attack(unit_cfgid): if int(unit_cfgid) == 911: return False return True @staticmethod # TODO 核对信息 def get_unit_value(unit_name, unit_cfgid): if int(unit_cfgid) == 911: return 25 if int(unit_cfgid) == 912: return 20 if int(unit_cfgid) == 913: return 50 if int(unit_cfgid) == 914: return 50 if int(unit_name) < 27: return 200 else: print("unknow value unit %s cfg %s" % (unit_name, unit_cfgid)) return -1 @staticmethod def get_frontest_soldier_in_line(state_info, line_index, team_id): units = StateUtil.get_units_in_team(state_info, team_id) soldiers = [ u for u in units if not StateUtil.if_unit_monster(u) and not StateUtil.if_unit_tower(u.unit_name) ] soldiers_in_line = 0 frontest = None for idx, soldier in enumerate(soldiers): line_pos = StateUtil.if_in_line(soldier, line_index) if line_pos >= 0: soldiers_in_line += 1 frontest = frontest if frontest is not None and ( (frontest.pos.x > soldier.pos.x and team_id == 0) or (frontest.pos.x < soldier.pos.x and team_id == 1)) else soldier # print('front_point team:%s, line:%s, %s/%s in line, frontest.x: %s' % (team_id, line_index, soldiers_in_line, # len(soldiers), 0 if frontest is None else frontest.pos.x)) return frontest @staticmethod def get_units_in_line(units, line_index): units_in_line = [] for unit in units: if unit is None: continue line_pos = StateUtil.if_in_line(unit, line_index) if line_pos >= 0: units_in_line.append(unit) return units_in_line # 得到兵线位置,小兵数量 # 兵线编号,从左到右为0-2 # 逻辑过于复杂,可能因为一个兵线格子过长,计算中点时候导致离真实的两波小兵都很远 @staticmethod def get_solider_lines(state_info, line_index, team_id): units = StateUtil.get_units_in_team(state_info, team_id) soldiers = [ u for u in units if not StateUtil.if_unit_monster(u) and not StateUtil.if_unit_tower(u.unit_name) ] line_pos_map = {} soldiers_in_line = 0 for idx, soldier in enumerate(soldiers): line_pos = StateUtil.if_in_line(soldier, line_index) if line_pos >= 0: soldiers_in_line += 1 if line_pos not in line_pos_map: line_pos_map[line_pos] = [soldier.unit_name] else: line_pos_map[line_pos].append(soldier.unit_name) print('front_point team:%s, line:%s, %s/%s in line' % (team_id, line_index, soldiers_in_line, len(soldiers))) # 遍历所有的小兵位置信息,然后返回小兵的集中点 # 集中点的定义为:每个格子记录一个集中点,为这个格子内所有小兵的中心位置 soldier_lines = [] cache_units = [] for line_pos_idx in range(len(StateUtil.LINE_WAY_POINTS[line_index])): # 如果当前兵线区域没有小兵,则连续中断,将之前连续的部分存成一个集中点 if line_pos_idx not in line_pos_map: # 计算中点 pos = StateUtil.cal_soldier_wave_point( state_info, line_pos_map[line_pos_idx]) sl = SoldierLine(team_id, line_index, pos, cache_units) soldier_lines.append(sl) # 按照兵线从开始到结尾进行排序 team0的顺序需要翻转 if team_id == 1 and len(soldier_lines) > 0: soldier_lines.reverse() return soldier_lines @staticmethod def cal_soldier_wave_point(state_info, unit_index_list): cached_x = 0 cached_z = 0 for unit_name in unit_index_list: unit = state_info.get_unit(unit_name) cached_x += unit.pos.x cached_z += unit.pos.z return PosStateInfo(int(cached_x / len(unit_index_list)), int(-80), int(cached_z / len(unit_index_list))) # 返回单位在兵线上的位置 # 结果从0开始 @staticmethod def if_in_line(unit_info, line_index, range=3000): line = StateUtil.LINE_WAY_POINTS[line_index] for idx, point in enumerate(line): if idx >= len(line) - 1: continue next_point = line[idx + 1] bound_x1 = min(next_point.x, point.x) bound_x2 = max(next_point.x, point.x) bound_y1 = min(next_point.z, point.z) - range bound_y2 = max(next_point.z, point.z) + range if bound_x1 <= unit_info.pos.x <= bound_x2 and bound_y1 <= unit_info.pos.z <= bound_y2: return idx return -1 @staticmethod def parse_state_log(json_str): # print(json_str) json_str = json_str[23:] # todo maybe becasu python3, the time before the { should be cut off state_json = JSON.loads(json_str) state_info = StateInfo.decode(state_json) return state_info @staticmethod def update_state_log(prev_state, cur_state): if prev_state is None: return cur_state # 因为每一次传输时候并不是全量信息,所以需要好上一帧的完整信息进行合并 # 合并小兵信息 # 合并野怪信息 # 合并塔信息 # 合并英雄信息 new_state = prev_state.merge(cur_state) return new_state @staticmethod def get_nearby_enemy_heros(state_info, hero_id, max_distance=ATTACK_HERO_RADIUS): hero = state_info.get_hero(hero_id) enemy_hero_team = 1 - hero.team enemy_heros = StateUtil.get_heros_in_team(state_info, enemy_hero_team) nearby_enemies = [] for enemy in enemy_heros: # 首先需要确定敌方英雄可见 if enemy.is_enemy_visible() and enemy.hp > 0: distance = StateUtil.cal_distance(hero.pos, enemy.pos) if distance < max_distance: nearby_enemies.append(enemy) nearby_enemies.sort(key=lambda h: int(h.hero_name), reverse=True) return nearby_enemies @staticmethod def get_nearby_friend_units(state_info, hero_id, max_distance=ATTACK_HERO_RADIUS): hero = state_info.get_hero(hero_id) friend_unit_team = hero.team friend_units = StateUtil.get_units_in_team(state_info, friend_unit_team) nearby_friend_units = [] for unit in friend_units: # 排除掉塔 # 排除掉野怪 if int(unit.unit_name) > 26 and not StateUtil.if_unit_monster( unit) and unit.hp > 0: distance = StateUtil.cal_distance(hero.pos, unit.pos) if distance < max_distance: nearby_friend_units.append(unit) nearby_friend_units.sort(key=lambda u: int(u.unit_name), reverse=True) return nearby_friend_units @staticmethod def get_nearby_enemy_units(state_info, hero_id, max_distance=ATTACK_HERO_RADIUS): hero = state_info.get_hero(hero_id) enemy_unit_team = 1 - hero.team enemy_units = StateUtil.get_units_in_team(state_info, enemy_unit_team) nearby_enemy_units = [] for unit in enemy_units: # 排除掉塔 # 排除掉野怪 if int(unit.unit_name) > 26 and not StateUtil.if_unit_monster( unit) and unit.hp > 0 and unit.state == 'in': distance = StateUtil.cal_distance(hero.pos, unit.pos) if distance < max_distance: nearby_enemy_units.append(unit) nearby_enemy_units.sort(key=lambda u: int(u.unit_name), reverse=True) return nearby_enemy_units @staticmethod def get_nearest_enemy_tower(state_info, hero_id, max_distance=ATTACK_HERO_RADIUS): hero = state_info.get_hero(hero_id) enemy_unit_team = 1 - hero.team enemy_units = StateUtil.get_units_in_team(state_info, enemy_unit_team) nearest_enemy_tower = None for unit in enemy_units: # 排除小兵 # 排除掉野怪 if int(unit.unit_name) < 27 and not StateUtil.if_unit_monster( unit) and unit.hp > 0: distance = StateUtil.cal_distance(hero.pos, unit.pos) if distance < max_distance: nearest_enemy_tower = unit max_distance = distance return nearest_enemy_tower @staticmethod def get_first_tower(state_info, hero): for unit in state_info.units: if unit.team == hero.team and (unit.pos.x == 17110 or unit.pos.x == -17110): return unit return None @staticmethod def get_hp_restore_place(state_info, hero): for unit in state_info.units: if unit.team == hero.team and (unit.pos.x == 17110 or unit.pos.x == -17110): # 移动到塔后侧 near_tower_x = unit.pos.x - 3000 if hero.team == 0 else unit.pos.x + 3000 pos = PosStateInfo(near_tower_x, unit.pos.y, unit.pos.z) return pos return None @staticmethod # 和下面函数的区别是这里是到达补血点 def get_tower_behind(tower_info, hero, line_index): near_tower_x = tower_info.pos.x - 4000 if hero.team == 0 else tower_info.pos.x + 4000 pos = PosStateInfo(near_tower_x, tower_info.pos.y, tower_info.pos.z) return pos @staticmethod # 这里是到达一个撤退点,注意不要去吃加血符文 def get_retreat_pos(state_info, hero, line_index): towers = [] for unit in state_info.units: if StateUtil.if_unit_tower( unit.unit_name) and unit.team == hero.team: if StateUtil.if_in_line(unit, line_index) >= 0: # 在英雄后面的塔 if hero.team == 0 and hero.pos.x > unit.pos.x: towers.append(unit) elif hero.team == 1 and hero.pos.x < unit.pos.x: towers.append(unit) if len(towers) > 0: towers.sort(key=lambda t: math.fabs(hero.pos.x - t.pos.x), reverse=False) near_tower = towers[0] # 移动到塔后侧 near_tower_x = near_tower.pos.x - 3000 if hero.team == 0 else near_tower.pos.x + 3000 near_tower_z = near_tower.pos.z - 2000 if hero.team == 0 else near_tower.pos.z + 2000 pos = PosStateInfo(near_tower_x, near_tower.pos.y, near_tower_z) return pos else: basement_pos = StateUtil.BASEMENT_TEAM_1 if hero.team == 1 else StateUtil.BASEMENT_TEAM_0 return basement_pos @staticmethod def get_near_towers_in_line(state_info, hero_state, line_idx, distance): towers = [] for unit in state_info.units: if int(unit.unit_name) <= 26: if StateUtil.if_in_line(unit, line_idx) >= 0: if StateUtil.cal_distance( unit.pos, hero_state.pos) < distance: # 根据配置得来 towers.append(unit) return towers @staticmethod def cal_distance2(pos1, pos2): # 忽略y值 distance = math.sqrt((pos1.x - pos2.x) * (pos1.x - pos2.x) + (pos1.z - pos2.z) * (pos1.z - pos2.z)) return distance @staticmethod def cal_distance(pos1, pos2): # 忽略y值 distance = math.sqrt((pos1.x - pos2.x) * (pos1.x - pos2.x) + (pos1.z - pos2.z) * (pos1.z - pos2.z)) / 1000 return distance @staticmethod def if_retreat(prev_pos, cur_pos, hero): if hero.team == 0 and cur_pos.x < prev_pos.x: return True if hero.team == 1 and cur_pos.x > prev_pos.x: return True return False @staticmethod def mov(direction, revert=False): # 根据输入0~7这8个整数,选择上下左右等八个方向返回 fwd = None if direction == 0: fwd = FwdStateInfo(1000, 0, 0) elif direction == 1: fwd = FwdStateInfo(707, 0, 707) elif direction == 2: fwd = FwdStateInfo(0, 0, 1000) elif direction == 3: fwd = FwdStateInfo(-707, 0, 707) elif direction == 4: fwd = FwdStateInfo(0, 0, -1000) elif direction == 5: fwd = FwdStateInfo(-707, 0, -707) elif direction == 6: fwd = FwdStateInfo(-1000, 0, 0) else: fwd = FwdStateInfo(-707, 0, 707) if revert: fwd.x *= -1 fwd.z *= -1 return fwd @staticmethod def build_command(action): if action.action == CmdActionEnum.MOVE and action.tgtpos is not None: return { "hero_id": action.hero_name, "action": 'MOVE', "pos": action.tgtpos.to_string() } if action.action == CmdActionEnum.MOVE and action.fwd is not None: return { "hero_id": action.hero_name, "action": 'MOVE', "fwd": action.fwd.to_string() } if action.action == CmdActionEnum.ATTACK and action.tgtid is not None: return { "hero_id": action.hero_name, "action": 'ATTACK', "tgtid": str(action.tgtid) } if action.action == CmdActionEnum.CAST and action.skillid is not None: command = { "hero_id": action.hero_name, "action": 'CAST', "skillid": str(action.skillid) } if action.tgtid is not None: command['tgtid'] = str(action.tgtid) if action.tgtpos is not None: command['tgtpos'] = action.tgtpos.to_string() if action.fwd: command['fwd'] = action.fwd.to_string() return command if action.action == CmdActionEnum.UPDATE and action.skillid is not None: return { "hero_id": action.hero_name, "action": 'UPDATE', "skillid": str(action.skillid) } if action.action == CmdActionEnum.BUY and action.itemid is not None: return { "hero_id": action.hero_name, "action": 'BUY', "itemid": str(action.itemid) } if action.action == CmdActionEnum.AUTO: return {"hero_id": action.hero_name, "action": 'AUTO'} if action.action == CmdActionEnum.HOLD: # 设置hold的行为为移动到本地。因为如果之前一个动作是攻击,hold等于继续攻击 return { "hero_id": action.hero_name, "action": 'MOVE', "pos": action.tgtpos.to_string() } if action.action == CmdActionEnum.RETREAT: return { "hero_id": action.hero_name, "action": 'MOVE', "pos": action.tgtpos.to_string() } if action.action == CmdActionEnum.RESTART: return {"hero_id": action.hero_name, "action": 'RESTART'} if action.action == CmdActionEnum.ADDGOLD: return { "hero_id": action.hero_name, "action": 'ADDGOLD', "gold": str(action.gold) } if action.action == CmdActionEnum.ADDLV: return { "hero_id": action.hero_name, "action": 'ADDLV', "lv": str(action.lv) } raise ValueError('unexpected action type ' + str(action.action)) @staticmethod def build_action_command(hero_id, action, parameters): #todo 这个函数现在只传了一个action进来,但是现在的action里面以及包含了需要的信息了,这个函数需要重写一下 if action == 'MOVE' and 'pos' in parameters: return { "hero_id": hero_id, "action": action, "pos": parameters['pos'] } if action == 'ATTACK' and 'tgtid' in parameters: return { "hero_id": hero_id, "action": action, "tgtid": parameters['tgtid'] } if action == 'CAST' and 'skillid' in parameters: command = { "hero_id": hero_id, "action": action, "skillid": parameters['skillid'] } if 'tgtid' in parameters: command['tgtid'] = parameters['tgtid'] if 'tgtpos' in parameters: command['tgtpos'] = parameters['tgtpos'] if 'fwd' in parameters: command['fwd'] = parameters['fwd'] return command if action == 'UPDATE' and 'skillid' in parameters: return { "hero_id": hero_id, "action": action, "skillid": parameters['skillid'] } if action == 'AUTO': return {"hero_id": hero_id, "action": action} if action == 'HOLD': return {"hero_id": hero_id, "action": action} if action == 'RESTART': return {"hero_id": hero_id, "action": action} raise ValueError('unexpected action type ' + action) @staticmethod def build_action_response(state_info): battle_id = state_info.battleid tick = state_info.tick action_strs = [] for hero in state_info.heros: # 测试代码: # 如果有可以升级的技能,直接选择第一个升级 skills = StateUtil.get_skills_can_upgrade(hero) if len(skills) > 0: update_str = StateUtil.build_action_command( hero.hero_name, 'UPDATE', {'skillid': str(skills[0])}) action_strs.append(update_str) # 得到周围的英雄和敌人单位信息 action_str = None nearby_enemy_heros = StateUtil.get_nearby_enemy_heros( state_info, hero.hero_name) nearby_enemy_units = StateUtil.get_nearby_enemy_units( state_info, hero.hero_name) total_len = len(nearby_enemy_heros) + len(nearby_enemy_units) if total_len > 0: ran_pick = randint(0, total_len - 1) tgtid = nearby_enemy_heros[ran_pick].hero_name if ran_pick < len(nearby_enemy_heros) \ else nearby_enemy_units[ran_pick - len(nearby_enemy_heros)].unit_name tgtpos = nearby_enemy_heros[ran_pick].pos if ran_pick < len(nearby_enemy_heros) \ else nearby_enemy_units[ran_pick - len(nearby_enemy_heros)].pos fwd = tgtpos.fwd(hero.pos) # 优先使用技能 # 其实技能需要根据种类不同来返回朝向,目标,或者目标地点,甚至什么都不传 for skillid in range(1, 4): # canuse不光代表是否英雄被沉默了,不能使用技能,也表示当前技能等级是否为0而导致不可用,还表示是否在cd中 if hero.skills[skillid].canuse: action_str = StateUtil.build_action_command( hero.hero_name, 'CAST', { 'skillid': str(skillid), 'tgtid': tgtid, 'tgtpos': tgtpos.to_string(), 'fwd': fwd.to_string() }) break if action_str is None: action_str = StateUtil.build_action_command( hero.hero_name, 'ATTACK', {'tgtid': tgtid}) # 在前1分钟,命令英雄到达指定地点 elif StateUtil.TICK_PER_STATE * 2 * 40 > int(tick) > 528: if hero.team == 0: action_str = StateUtil.build_action_command( hero.hero_name, 'MOVE', {'pos': '( -5000, -80, 0)'}) else: action_str = StateUtil.build_action_command( hero.hero_name, 'MOVE', {'pos': '( 5000, -80, 0)'}) else: action_str = StateUtil.build_action_command( hero.hero_name, 'HOLD', {}) action_strs.append(action_str) rsp_obj = {"ID": battle_id, "tick": tick, "cmd": action_strs} rsp_str = JSON.dumps(rsp_obj) return rsp_str @staticmethod def get_hit_rival_tower_dmg_ratio(cur_state, next_state, next_next_state, hero_name): dmg = 0 cur_act = cur_state.get_hero_action(hero_name) skill_slot = cur_act.skillid if cur_act.action == CmdActionEnum.ATTACK: # 对于物攻,同样因为伤害有延迟,先检查玩家在下一帧的行动 # 如果不是物攻,这读取这两帧中间的物攻伤害 next_act = next_state.get_hero_action(hero_name) if next_act is None or next_act.action != CmdActionEnum.ATTACK: dmg = next_state.get_hero_tower_dmg(hero_name) dmg += next_next_state.get_hero_tower_dmg(hero_name) else: # 否则只计算当前帧的,简单起见 dmg = next_state.get_hero_tower_dmg(hero_name) return dmg
def play_move(pos, fwd, time_second=0.5): # 不考虑不可到达等问题 return PosStateInfo(pos.x + time_second * fwd.x, pos.y + time_second * fwd.y, pos.z + time_second * fwd.z)
def move_towards(start_pos, dest_pos, move_dis, distance): final_pos_x = start_pos.x + (dest_pos.x - start_pos.x) * move_dis / distance final_pos_z = start_pos.z + (dest_pos.y - start_pos.z) * move_dis / distance return PosStateInfo(final_pos_x, start_pos.y, final_pos_z)
def decode(obj): atker = obj['atker'] defer = obj['defer'] if 'defer' in obj else None tgtpos = PosStateInfo.decode(obj['tgtpos']) if 'tgtpos' in obj else None skill = obj['skill'] return AttackStateInfo(atker, defer, tgtpos, skill)