def if_restart(self, state_infos, state_index): # 重开条件:英雄死亡两次或者第一个塔被打掉 state_info = state_infos[state_index] next_state = state_infos[state_index + 1] new = 0 loss_team = -1 for hero_name in [self.model1_hero, self.model2_hero]: hero_info = state_info.get_hero(hero_name) if hero_name == self.model1_hero: if_hero_dead = StateUtil.if_hero_dead(state_info, next_state, hero_name) self.model1_total_death += if_hero_dead total_death = self.model1_total_death if if_hero_dead == 1: self.model1_just_dead = 1 else: if_hero_dead = StateUtil.if_hero_dead(state_info, next_state, hero_name) self.model2_total_death += if_hero_dead total_death = self.model2_total_death if if_hero_dead == 1: self.model2_just_dead = 1 tower_destroyed_cur = StateUtil.if_first_tower_destroyed_in_middle_line(state_info) tower_destroyed_next = StateUtil.if_first_tower_destroyed_in_middle_line(next_state) if total_death >= 2 or (tower_destroyed_cur is None and tower_destroyed_next is not None): # 这里是唯一的结束当前局,重开的点 print('battle_id', state_info.tick, '重开游戏') new = 1 loss_team = hero_info.team if total_death >= 2 else tower_destroyed_next self.model1_total_death = 0 self.model2_total_death = 0 return new, loss_team return new, loss_team
def train_line_model(state_path, model_path, scope, output_model_path, heros): state_file = open(state_path, "r") model = LineModel_DQN(279, 48, heros, scope=scope) if model_path is not None: model.load(model_path) lines = state_file.readlines() for idx in range(len(lines)): state_info = StateUtil.parse_state_log(lines[idx]) if len(state_info.actions) > 0: # 去掉最后几帧没有reward的情况 flag = 0 for action in state_info.actions: if action.reward == None: flag = flag + 1 if flag == 0: prev_state_info = StateUtil.parse_state_log(lines[idx-1]) next_state_info = StateUtil.parse_state_log(lines[idx+1]) model.get_action(prev_state_info, state_info, '27', '28') added = model.remember(prev_state_info, state_info, next_state_info) if added: # 需要手动添加 model.act_times += 1 model1_memory_len = model.get_memory_size() if model.if_replay(64): # print ('开始模型训练') model.replay(64) if model1_memory_len > 0 and model1_memory_len % 1000 == 0: save_dir = output_model_path + str(model.get_memory_size()) os.makedirs(save_dir) model.save(save_dir + '/model')
def cal_target_ppo_2(prev_state, cur_state, next_state, hero_name, rival_hero_name, line_idx): LineModel_PPO1.assert_tower_in_input(cur_state, hero_name, rival_hero_name) # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化 # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况 cur_rival_hero = cur_state.get_hero(rival_hero_name) rival_team = cur_rival_hero.team cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) next_hero = next_state.get_hero(hero_name) next_rival_hero = next_state.get_hero(rival_hero_name) # 找到英雄附近死亡的敌方小兵 dead_units = StateUtil.get_dead_units_in_line( next_state, rival_team, line_idx, cur_hero, StateUtil.GOLD_GAIN_RADIUS) dead_golds = sum([ StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units ]) # 如果英雄有小额金币变化,则忽略 gold_delta = next_hero.gold - cur_hero.gold if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int( dead_golds / 2) + 3: gold_delta -= 3 # 很难判断英雄的最后一击,所以我们计算金币变化,超过死亡单位一半的金币作为英雄获得金币 if gold_delta > 0: gold_delta = gold_delta * 2 - dead_golds if gold_delta < 0: print('获得击杀金币不应该小于零', cur_state.tick, 'dead_golds', dead_golds, 'gold_delta', (next_hero.gold - cur_hero.gold)) gold_delta = 0 # if dead_golds > 0: # print('dead_gold', dead_golds, 'delta_gold', gold_delta, "hero", hero_name, "tick", cur_state.tick) reward = float(gold_delta) / 100 # 将所有奖励缩小 final_reward = reward / 100 final_reward = min(max(final_reward, -1), 1) # 特殊奖励,放在最后面 # 英雄击杀最后一击,直接最大奖励(因为gamma的存在,扩大这个惩罚) if cur_rival_hero.hp > 0 and next_rival_hero.hp <= 0: # print('对线英雄%s死亡' % rival_hero_name) dmg_hit_rival = next_state.get_hero_total_dmg( hero_name, rival_hero_name) if dmg_hit_rival > 0: # print('英雄%s对对方造成了最后一击' % hero_name) final_reward = 1 if cur_hero.hp > 0 and next_hero.hp <= 0: final_reward = 0 elif cur_hero.hp > 0 and next_hero.hp <= 0: print('英雄死亡') final_reward = -1 return final_reward
def get_action(selected, state_info, hero, hero_name, rival_hero, revert=False): if selected < 8: # move fwd = StateUtil.mov(selected, revert) tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15, hero.pos.y + fwd.y * 15, hero.pos.z + fwd.z * 15) action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, selected, None) return action elif selected < 18: # 对敌英雄,塔,敌小兵1~8使用普攻 if selected == 8: # 敌方塔 tower = StateUtil.get_nearest_enemy_tower( state_info, hero_name, StateUtil.ATTACK_UNIT_RADIUS) tgtid = tower.unit_name action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action elif selected == 9: # 敌方英雄 tgtid = rival_hero action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action else: # 小兵 creeps = StateUtil.get_nearby_enemy_units( state_info, hero_name) n = selected - 10 tgtid = creeps[n].unit_name action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action elif selected < 48: # skill skillid = int((selected - 18) / 10 + 1) [tgtid, tgtpos] = LineModel.choose_skill_target( selected - 18 - (skillid - 1) * 10, state_info, skillid, hero_name, hero.pos, rival_hero) if tgtpos is None: fwd = None else: fwd = tgtpos.fwd(hero.pos) action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid, tgtpos, fwd, None, selected, None) return action elif selected == 48: # hold # print("轮到了48号行为-hold") action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, hero.pos, None, None, 48, None) return action else: # 撤退 retreat_pos = StateUtil.get_retreat_pos(state_info, hero, line_index=1) action = CmdAction(hero_name, CmdActionEnum.RETREAT, None, None, retreat_pos, None, None, selected, None) return action
def upgrade_skills(self, state_info, hero_name): # 如果有可以升级的技能,优先升级技能3 hero = state_info.get_hero(hero_name) skills = StateUtil.get_skills_can_upgrade(hero) if len(skills) > 0: skillid = 3 if 3 in skills else skills[0] update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE, skillid, None, None, None, None, None, None) update_str = StateUtil.build_command(update_cmd) return update_str
def if_hero_leave_line(state_infos, state_idx, hero_name, line_index): if state_idx > 0: prev_state = state_infos[state_idx - 1] cur_state = state_infos[state_idx] # 离线太远就进行惩罚 prev_hero = prev_state.get_hero(hero_name) cur_hero = cur_state.get_hero(hero_name) prev_in_line = StateUtil.if_in_line(prev_hero, line_index, 4000) cur_in_line = StateUtil.if_in_line(cur_hero, line_index, 4000) if prev_in_line >= 0 and cur_in_line == -1: return True return False
def find_skill_targets(state_info, attacker_info, tgt_pos, skill_length, skill_width, is_circle): if not is_circle: # 首先满足直线距离应该小于技能长度 tgt_unit_list = [ unit for unit in state_info.units if StateUtil.cal_distance2(attacker_info.pos, unit.pos) <= skill_length and not StateUtil.if_unit_tower(unit.unit_name) ] tgt_hero_list = [ hero for hero in state_info.heros if StateUtil.cal_distance2(attacker_info.pos, hero.pos) <= skill_length and hero.hero_name != attacker_info.hero_name ] # 需要旋转坐标系 x1 = xcosa + ysina, y1 = ycosa - xsina # 或者按比例计算最大最下z值(相当于坐标系上的x) tgt_units = [] for unit in tgt_unit_list: mid_x = attacker_info.pos.z + StateUtil.cal_distance2( attacker_info.pos, unit.pos) / StateUtil.cal_distance2( attacker_info.pos, tgt_pos) * (tgt_pos - attacker_info) if mid_x - skill_width / 2 <= unit.x <= mid_x + skill_width / 2: tgt_units.append(unit) tgt_heros = [] for hero in tgt_hero_list: mid_x = attacker_info.pos.z + StateUtil.cal_distance2( attacker_info.pos, hero.pos) / StateUtil.cal_distance2( attacker_info.pos, tgt_pos) * (tgt_pos - attacker_info) if mid_x - skill_width / 2 <= hero.x <= mid_x + skill_width / 2: tgt_heros.append(hero) return tgt_units, tgt_heros else: # 根据tgt_pos画圆,计算范围内的敌人 # 攻击对象不是塔 tgt_unit_list = [ unit for unit in state_info.units if StateUtil.cal_distance2(tgt_pos, unit.pos) <= skill_length and not StateUtil.if_unit_tower(unit.unit_name) ] tgt_hero_list = [ hero for hero in state_info.heros if StateUtil.cal_distance2(tgt_pos, hero.pos) <= skill_length and hero.hero_name != attacker_info.hero_name ] return tgt_unit_list, tgt_hero_list
def if_hero_attack_opponent(hero_action): if hero_action is None: return False if hero_action.tgtid is not None and StateUtil.if_unit_hero( hero_action.tgtid): return True return False
def gen_input_hero(self, hero, rival_towers): if hero.state == 'out' or hero.hp <= 0: return list(np.zeros(13+3*19)) dis_rival = 10000 if len(rival_towers) > 0: dis_list = [StateUtil.cal_distance2(hero.pos, t.pos) for t in rival_towers] dis_rival = min(dis_list) hero_input = [self.normalize_value(int(hero.hero_name)), self.normalize_value(hero.pos.x), self.normalize_value(hero.pos.z), self.normalize_value(hero.speed), self.normalize_value(hero.att), # todo: 2 是普攻手长,现只适用于1,2号英雄,其他英雄可能手长不同 0.2, self.normalize_value(hero.mag), self.normalize_value(hero.hp), hero.hp/float(hero.maxhp), self.normalize_value(hero.mp), self.normalize_value(dis_rival), hero.team] is_enemy_visible = hero.is_enemy_visible() hero_input.append(int(is_enemy_visible)) skill_info1 = SkillUtil.get_skill_info(hero.cfg_id, 1) skill_info2 = SkillUtil.get_skill_info(hero.cfg_id, 2) skill_info3 = SkillUtil.get_skill_info(hero.cfg_id, 3) skill_input1 = self.gen_input_skill(skill_info1, hero.skills[1]) skill_input2 = self.gen_input_skill(skill_info2, hero.skills[2]) skill_input3 = self.gen_input_skill(skill_info3, hero.skills[3]) hero_input=hero_input+skill_input1+skill_input2+skill_input3 return hero_input
def find_next_tgt(state_info, unit, soldier_list): # 为了节省计算量,我们只从英雄附近的小兵中寻找被攻击者, 或者是英雄 heros = StateUtil.get_heros_in_team(state_info, 1 - unit.team_id) hero = heros[0] min_dis = StateUtil.cal_distance2(hero.pos, unit.pos) tgt = hero.hero_name for soldier in soldier_list: dis = StateUtil.cal_distance2(soldier.pos, unit.pos) if dis < min_dis: tgt = soldier.unit_name min_dis = dis if min_dis <= StateUtil.TOWER_ATTACK_RADIUS * 1000: return tgt return None
def play_unit_action(state_info, unit, hero_info, hero_action, near_enemy_units): #TODO 需要界定攻击英雄的小兵的范围 # 最高优先级:如果英雄攻击了对方英雄,周围小兵会优先攻击英雄 # 考虑攻击范围,当前默认都使用塔的攻击范围 if PlayEngine.if_hero_attack_opponent(hero_action) \ and StateUtil.cal_distance(hero_info.pos, unit.pos) <= StateUtil.TOWER_ATTACK_RADIUS: state_info = PlayEngine.play_attack(state_info, unit.unit_name, hero_action.hero_name) # 根据attack info来执行动作 else: find_new_tgt = False att = state_info.get_attack_info(unit.unit_name) if att is not None: # 判断被攻击对象是否已经挂了 defender = state_info.get_unit(att.defer) if defender is None or defender.hp <= 0: find_new_tgt = True else: state_info = PlayEngine.play_attack( state_info, unit.unit_name, att.defer) # 如果丢失对象,攻击最近的敌人 # 为了节省计算量,我们只从英雄附近的小兵中寻找被攻击者,或者是英雄 if att is None or find_new_tgt: tgt = PlayEngine.find_next_tgt(state_info, unit, near_enemy_units) if tgt is not None: state_info = PlayEngine.play_attack( state_info, unit.unit_name, tgt) return state_info
def gen_input_building(self, building, query_hero=None, state_info=None, hero_name=None, revert=False): if building is None: building_info = np.zeros(8) building_info = list(building_info) else: hero_info = state_info.get_hero(hero_name) building_info = [ self.normalize_value(building.pos.x - query_hero.pos.x if not revert else -( building.pos.x - query_hero.pos.x)), self.normalize_value(building.pos.z - query_hero.pos.z if not revert else -( building.pos.z - query_hero.pos.z)), self.normalize_value(building.att), self.normalize_value(7000), self.normalize_value(building.hp), self.normalize_value( StateUtil.cal_distance2(building.pos, hero_info.pos)), building.team if not revert else 1 - building.team ] # 添加是否在攻击当前英雄 attack_info = state_info.if_unit_attack_hero( building.unit_name, hero_name) if attack_info is None: building_info.append(0) else: building_info.append(1) return building_info
def units_in_tower_range(units, target_pos): num = 0 for unit in units: if StateUtil.cal_distance( unit.pos, target_pos) <= StateUtil.TOWER_ATTACK_RADIUS: num += 1 return num
def cal_state_log_action_reward(state_path, output_path): state_file = open(state_path, "r") output = open(output_path, 'w') lines = state_file.readlines() state_logs = [] prev_state = None for line in lines: cur_state = StateUtil.parse_state_log(line) if cur_state.tick == StateUtil.TICK_PER_STATE: print("clear") prev_state = None elif prev_state is not None and prev_state.tick >= cur_state.tick: print ("clear") prev_state = None if prev_state is not None: state_logs.append(prev_state) prev_state = cur_state if prev_state is not None: state_logs.append(prev_state) # 测试计算奖励值 state_logs_with_reward = LineModel.update_rewards(state_logs) for state_with_reward in state_logs_with_reward: # 将结果记录到文件 state_encode = state_with_reward.encode() state_json = JSON.dumps(state_encode) output.write(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + state_json + "\n") output.flush() print(len(state_logs))
def play_step(state_info, heros, hero_actions): # 基本逻辑,如果英雄攻击了对方英雄,周围小兵会优先攻击英雄 # 执行英雄行为 for action in hero_actions: hero_info = state_info.get_hero(action.hero_name) PlayEngine.play_hero_action(state_info, action, hero_info) # 只考虑英雄附近的小兵 played_units = [] for hero_name in heros: hero_info = state_info.get_hero(hero_name) hero_action = PlayEngine.find_hero_action(hero_actions, hero_name) near_enemy_units = StateUtil.get_nearby_enemy_units( state_info, hero_name, StateUtil.LINE_MODEL_RADIUS) near_friend_units = StateUtil.get_nearby_friend_units( state_info, hero_name, StateUtil.LINE_MODEL_RADIUS) # 执行小兵行为 for unit in near_enemy_units: if unit.unit_name not in played_units: played_units.append(unit.unit_name) state_info = PlayEngine.play_unit_action( state_info, unit, hero_info, hero_action, near_friend_units) for unit in near_friend_units: if unit.unit_name not in played_units: played_units.append(unit.unit_name) state_info = PlayEngine.play_unit_action( state_info, unit, hero_info, hero_action, near_enemy_units) # 执行塔的行为 # 扩大塔的搜索范围 nearest_enemy_tower = StateUtil.get_nearest_enemy_tower( state_info, action.hero_name, StateUtil.LINE_MODEL_RADIUS + 5) if nearest_enemy_tower is not None and nearest_enemy_tower.unit_name not in played_units: played_units.append(nearest_enemy_tower.unit_name) state_info = PlayEngine.play_unit_action( state_info, nearest_enemy_tower, hero_info, hero_action, near_friend_units) # 更新Buff信息 state_info = BuffInfo.update_unit_buffs(state_info, hero_name)
def if_hit_by_tower(state_infos, state_idx, state_num, hero_name): for i in range(state_num): # hit 有延迟 state_info = state_infos[state_idx + i + 1] hit_names = state_info.get_hero_be_attacked_info(hero_name) for unit_name in hit_names: if StateUtil.if_unit_tower(unit_name): return True return False
def assert_tower_in_input(cur_state, hero_name, rival_hero): # 如果敌方塔要攻击英雄的话,检查塔的信息是不是在input中 att_info = cur_state.if_tower_attack_hero(hero_name) if att_info is not None: tower = str(att_info.atker) tower_info = cur_state.get_obj(tower) hero_info = cur_state.get_hero(hero_name) model_input = LineModel_PPO1.gen_input(cur_state, hero_name, rival_hero) if model_input[44] == Line_Input_Lite.normalize_value_static( int(tower)): print('yes found attack tower in input', tower, 'distance', model_input[50], 'cal_distance', StateUtil.cal_distance2(tower_info.pos, hero_info.pos)) else: print('not found attack tower in input', tower, 'distance', model_input[50], 'cal_distance', StateUtil.cal_distance2(tower_info.pos, hero_info.pos))
def choose_skill_target(selected, state_info, skill_info, hero_name, pos, tgt_hero_name, debug=False): hero_info = state_info.get_hero(hero_name) if selected == 0: # 施法目标为自己 # 首先判断施法目标是不是只限于敌方英雄 if skill_info.cast_target == SkillTargetEnum.self and hero_name != str( tgt_hero_name): if debug: print("施法目标为self,但是对象不是自己") return [-1, None] tgtid = hero_name # TODO 这里有点问题,如果是目标是自己的技能,是不是要区分下目的,否则fwd计算会出现问题 tgtpos = None if selected <= 4: # 攻击对方英雄 tgt_hero = state_info.get_hero(tgt_hero_name) if tgt_hero.team != hero_info.team and not tgt_hero.is_enemy_visible( ): if debug: print("敌方英雄不可见") tgtid = -1 tgtpos = None elif StateUtil.cal_distance(tgt_hero.pos, pos) > skill_info.cast_distance: if debug: print("技能攻击不到对方 %s %s %s" % (tgt_hero_name, StateUtil.cal_distance( tgt_hero.pos, pos), skill_info.cast_distance)) tgtid = 0 tgtpos = None # 对方英雄死亡时候忽略这个目标 elif tgt_hero.hp <= 0: if debug: print("技能攻击不了对方,对方已经死亡") tgtid = -1 tgtpos = None else: tgtid = tgt_hero_name tgtpos = tgt_hero.pos return tgtid, tgtpos
def guess_action_cal_reward(state_path, output_path): state_file = open(state_path, "r") output = open(output_path, 'w') lines = state_file.readlines() state_logs = [] prev_state = None for line in lines: cur_state = StateUtil.parse_state_log(line) if cur_state.tick == StateUtil.TICK_PER_STATE: print("clear") prev_state = None elif prev_state is not None and prev_state.tick >= cur_state.tick: print ("clear") prev_state = None if prev_state is not None: state_logs.append(prev_state) prev_state = cur_state if prev_state is not None: state_logs.append(prev_state) # 猜测玩家行为 for idx in range(1, len(state_logs)-1): prev_state = state_logs[idx-1] cur_state = state_logs[idx] next_state = state_logs[idx+1] if cur_state.tick >= 55044: db = 1 hero = prev_state.get_hero("27") line_index = 1 near_enemy_heroes = StateUtil.get_nearby_enemy_heros(prev_state, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) near_enemy_units = StateUtil.get_nearby_enemy_units(prev_state, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) nearest_enemy_tower = StateUtil.get_nearest_enemy_tower(prev_state, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) near_enemy_units_in_line = StateUtil.get_units_in_line(near_enemy_units, line_index) nearest_enemy_tower_in_line = StateUtil.get_units_in_line([nearest_enemy_tower], line_index) if len(near_enemy_heroes) != 0 or len(near_enemy_units_in_line) != 0 or len( nearest_enemy_tower_in_line) != 0: player_action = Replayer.guess_player_action(prev_state, cur_state, next_state, "27", "28") action_str = StateUtil.build_command(player_action) print('玩家行为分析:' + str(action_str) + ' tick:' + str(prev_state.tick) + ' prev_pos: ' + hero.pos.to_string() + ', cur_pos: ' + cur_state.get_hero(hero.hero_name).pos.to_string()) prev_state.add_action(player_action) # 测试计算奖励值 state_logs_with_reward = LineModel.update_rewards(state_logs) for state_with_reward in state_logs_with_reward: # 将结果记录到文件 state_encode = state_with_reward.encode() state_json = JSON.dumps(state_encode) output.write(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + state_json + "\n") output.flush() print(len(state_logs))
def replay_battle_log(log_path, state_path, hero_names, model_path=None, save_model_path=None): path = log_path file = open(path, "r") state_file = open(state_path, 'w') lines = file.readlines() state_logs = [] prev_state = None model = LineModel(279, 48, hero_names) if model_path is not None: model.load(model_path) if save_model_path is not None: model.save(save_model_path) line_trainer = LineTrainer(hero_names) for line in lines: if prev_state is not None and int(prev_state.tick) > 248556: i = 1 cur_state = StateUtil.parse_state_log(line) if cur_state.tick == StateUtil.TICK_PER_STATE: print("clear") prev_state = None elif prev_state is not None and prev_state.tick >= cur_state.tick: print ("clear") prev_state = None state_info = StateUtil.update_state_log(prev_state, cur_state) # 测试对线模型 rsp_str = line_trainer.build_response(state_info, prev_state, model, hero_names) print(rsp_str) prev_state = state_info state_logs.append(state_info) # 测试计算奖励值 for state in state_logs: # 将结果记录到文件 state_encode = state.encode() state_json = JSON.dumps(state_encode) state_file.write(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + state_json + "\n") state_file.flush() print(len(state_logs))
def policy_move_retreat(hero_info): if hero_info.team == 0: mov_idx = 6 else: mov_idx = 0 fwd = StateUtil.mov(mov_idx) tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15, hero_info.pos.y + fwd.y * 15, hero_info.pos.z + fwd.z * 15) action = CmdAction(hero_info.hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, mov_idx, None) return action
def guess_hero_actions(self, state_index, real_heros=None): prev_state = self.state_cache[state_index - 1] cur_state = self.state_cache[state_index] next_state = self.state_cache[state_index + 1] # 如果有必要的话,更新这一帧中真人玩家的行为信息 if real_heros is not None: for hero_name in real_heros: hero_action = Replayer.guess_player_action(prev_state, cur_state, next_state, hero_name, '28') cur_state.add_action(hero_action) action_str = StateUtil.build_command(hero_action) print('玩家行为分析:' + str(action_str) + ' tick:' + str(cur_state.tick))
def policy_attack_rival_unit(hero_info, rival_hero_info, state_info, hero_name, rival_near_units, rival_near_tower, near_friend_units): # 如果附近没有敌方英雄,而且不在塔下,且有己方小兵 # 攻击敌方小兵 if (rival_hero_info.hp <= 0 or StateUtil.cal_distance(hero_info.pos, rival_hero_info.pos) >= LineTrainerPolicy.SAFE_RIVAL_HERO_DISTANCE) and \ rival_near_tower is None and len(near_friend_units) > 0: # 优先攻击快没有血的 for unit in rival_near_units: if unit.hp <= hero_info.att - 20: action = LineTrainerPolicy.get_attack_unit_action( state_info, hero_name, unit.unit_name, 0) print("启动策略 如果附近没有敌方英雄,而且不在塔下,补兵 " + hero_name) return action # 如果敌方小兵在攻击自己,后撤到己方的小兵后面 for unit in rival_near_units: att = state_info.if_unit_attack_hero(unit.unit_name, hero_name) if att is not None: # 优先物理攻击 retreat = LineTrainerPolicy.policy_move_retreat(hero_info) print("启动策略 被小兵攻击的情况下后撤 " + hero_name) return retreat # 物理攻击,不攻击血量较少的,留给补刀 # 选择距离较近的(离己方塔) rival_near_units_sorted = list(rival_near_units) basement_pos = StateUtil.get_basement(hero_info) rival_near_units_sorted.sort( key=lambda u: math.fabs(basement_pos.x - u.pos.x), reverse=False) for unit in rival_near_units_sorted: if unit.hp > hero_info.att * 3: action = LineTrainerPolicy.get_attack_unit_action( state_info, hero_name, unit.unit_name, 0) print("启动策略 如果附近没有敌方英雄,而且不在塔下,攻击敌方小兵 " + hero_name) return action return None
def get_attack_unit_action(state_info, hero_name, unit_name, skill_id): creeps = StateUtil.get_nearby_enemy_units(state_info, hero_name) unit_idx = [c.unit_name for c in creeps].index(unit_name) action_idx = unit_idx + 10 * skill_id + 10 if skill_id >= 1: action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, unit_name, None, None, None, action_idx, None) else: tgtpos = creeps[unit_idx].pos hero = state_info.get_hero(hero_name) fwd = tgtpos.fwd(hero.pos) action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id, unit_name, tgtpos, fwd, None, action_idx, None) return action
def use_skill3_correctly(state_info, hero_info, rival_hero_info, rival_near_units, action_ratios): if hero_info.cfg_id == '101': rival_hero_dis = StateUtil.cal_distance(hero_info.pos, rival_hero_info.pos) if rival_hero_dis > LineTrainerPolicy.SKILL_RANGE_CHAERSI_SKILL3: if LineTrainerPolicy.units_in_range( rival_near_units, hero_info.pos, LineTrainerPolicy.SKILL_RANGE_CHAERSI_SKILL3) == 0: print('策略选择', state_info.battleid, hero_info.hero_name, '查尔斯大招不应该在没人的时候使用') for i in range(38, 48): action_ratios[i] = -1 return action_ratios
def get_closest_fwd(fwd): maxcos = -1 output_index = 0 output_fwd = fwd for i in range(8): fwd1 = StateUtil.mov(i) a = fwd.x * fwd.x + fwd.z * fwd.z b = fwd1.x * fwd1.x + fwd1.z * fwd1.z cos = (fwd.x * fwd1.x + fwd.z * fwd1.z) / (math.sqrt(a) * math.sqrt(b)) if cos > maxcos: maxcos = cos output_index = i output_fwd = fwd1 return [output_fwd, output_index]
def gen_input_hero(self, hero, query_hero, rival_towers, revert=False): if hero.state == 'out' or hero.hp <= 0: return list(np.zeros(16 + 3 * 17)) dis_rival = 10000 if len(rival_towers) > 0: dis_list = [ StateUtil.cal_distance2(hero.pos, t.pos) for t in rival_towers ] dis_rival = min(dis_list) hero_input = [ self.normalize_value(hero.pos.x - query_hero.pos.x if not revert else -( hero.pos.x - query_hero.pos.x)), self.normalize_value(hero.pos.z - query_hero.pos.z if not revert else -( hero.pos.z - query_hero.pos.z)), self.normalize_value(hero.speed), self.normalize_value(hero.att), self.normalize_value(hero.attspeed), self.normalize_value(hero.attpen), self.normalize_value(hero.attpenrate), # # todo: 2 是普攻手长,现只适用于1,2号英雄,其他英雄可能手长不同 # 0.2, self.normalize_value(hero.hp), hero.hp / float(hero.maxhp), self.normalize_value(hero.hprec), self.normalize_value(hero.mp), self.normalize_value(hero.mag), self.normalize_value(hero.magpen), self.normalize_value(hero.magpenrate), self.normalize_value(dis_rival), hero.team if not revert else 1 - hero.team ] # is_enemy_visible = hero.is_enemy_visible() # hero_input.append(int(is_enemy_visible)) skill_info1 = SkillUtil.get_skill_info(hero.cfg_id, 1) skill_info2 = SkillUtil.get_skill_info(hero.cfg_id, 2) skill_info3 = SkillUtil.get_skill_info(hero.cfg_id, 3) skill_input1 = self.gen_input_skill(skill_info1, hero.skills[1]) skill_input2 = self.gen_input_skill(skill_info2, hero.skills[2]) skill_input3 = self.gen_input_skill(skill_info3, hero.skills[3]) hero_input = hero_input + skill_input1 + skill_input2 + skill_input3 return hero_input
def keep_away_from(state_info, hero_info, rival_hero_info, action_ratios, danger_pos, danger_radius): changed = False maxQ = max(action_ratios) selected = action_ratios.index(maxQ) if maxQ == -1: return action_ratios for selected in range(len(action_ratios)): if action_ratios[selected] == -1: continue if selected < 8: fwd = StateUtil.mov(selected) tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 0.5, hero_info.pos.y + fwd.y * 0.5, hero_info.pos.z + fwd.z * 0.5) if StateUtil.cal_distance(tgtpos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '移动方向会进入危险区域', hero_info.pos.to_string(), tgtpos.to_string()) action_ratios[selected] = -1 elif selected < 18: # 对敌英雄,塔,敌小兵1~8使用普攻, 针对近战英雄的检测 if selected == 8: # 敌方塔 print('策略选择', state_info.battleid, hero_info.hero_name, '不要去攻击塔') action_ratios[selected] = -1 elif selected == 9: # 敌方英雄 if StateUtil.cal_distance(rival_hero_info.pos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '不要去近身攻击塔范围内的英雄') action_ratios[selected] = -1 else: # 小兵 creeps = StateUtil.get_nearby_enemy_units( state_info, hero_info.hero_name) n = selected - 10 tgt = creeps[n] if StateUtil.cal_distance(tgt.pos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '不要去近身攻击塔范围内的小兵') action_ratios[selected] = -1 elif hero_info.cfg_id == '101' and 28 <= selected < 38: # 专门针对查尔斯的跳跃技能 skillid = int((selected - 18) / 10 + 1) [tgtid, tgtpos] = LineModel.choose_skill_target( selected - 18 - (skillid - 1) * 10, state_info, skillid, hero_info.hero_name, hero_info.pos, rival_hero_info.hero_name) if tgtpos is not None: if StateUtil.cal_distance(tgtpos, danger_pos) <= danger_radius: print('策略选择', state_info.battleid, hero_info.hero_name, '跳跃技能在朝着塔下的目标') action_ratios[selected] = -1 return action_ratios
def search_team_battle_hero(self, state_info, hero): # 检查是否有团战,并且得到团战的范围内所有的单位 # 团战范围的定义 # 首先从一个英雄开始找起,如果它周围有敌人,就把敌人和自己人全都列为范围内,然后用新的人物继续寻找 # 注:这里只找一个开团点 checked_heros = set() team_battle_heros = set() # 找到第一个周围有敌人的 team_battle_heros.add(hero) while len(checked_heros) < len(team_battle_heros): for hero in team_battle_heros.copy(): if hero not in checked_heros: near_enemy_heroes = StateUtil.get_nearby_enemy_heros( state_info, hero, TeamBattleTrainer.MODEL_RANGE) for enemy in near_enemy_heroes: team_battle_heros.add(enemy.hero_name) checked_heros.add(hero) return team_battle_heros
def gen_input_building(self,building, state_info=None, hero_name=None): if building is None: building_info=np.zeros(9) building_info=list(building_info) else: hero_info = state_info.get_hero(hero_name) building_info=[self.normalize_value(int(building.unit_name)), self.normalize_value(building.pos.x), self.normalize_value(building.pos.z), self.normalize_value(building.att), self.normalize_value(7000), self.normalize_value(building.hp), self.normalize_value(StateUtil.cal_distance2(building.pos, hero_info.pos)), building.team] # 添加是否在攻击当前英雄 attack_info = state_info.if_unit_attack_hero(building.unit_name, hero_name) if attack_info is None: building_info.append(0) else: building_info.append(1) return building_info