Beispiel #1
0
    def if_restart(self, state_infos, state_index):
        # 重开条件:英雄死亡两次或者第一个塔被打掉
        state_info = state_infos[state_index]
        next_state = state_infos[state_index + 1]
        new = 0
        loss_team = -1
        for hero_name in [self.model1_hero, self.model2_hero]:
            hero_info = state_info.get_hero(hero_name)
            if hero_name == self.model1_hero:
                if_hero_dead = StateUtil.if_hero_dead(state_info, next_state, hero_name)
                self.model1_total_death += if_hero_dead
                total_death = self.model1_total_death
                if if_hero_dead == 1:
                    self.model1_just_dead = 1
            else:
                if_hero_dead = StateUtil.if_hero_dead(state_info, next_state, hero_name)
                self.model2_total_death += if_hero_dead
                total_death = self.model2_total_death
                if if_hero_dead == 1:
                    self.model2_just_dead = 1

            tower_destroyed_cur = StateUtil.if_first_tower_destroyed_in_middle_line(state_info)
            tower_destroyed_next = StateUtil.if_first_tower_destroyed_in_middle_line(next_state)
            if total_death >= 2 or (tower_destroyed_cur is None and tower_destroyed_next is not None):
                # 这里是唯一的结束当前局,重开的点
                print('battle_id', state_info.tick, '重开游戏')
                new = 1
                loss_team = hero_info.team if total_death >= 2 else tower_destroyed_next
                self.model1_total_death = 0
                self.model2_total_death = 0
                return new, loss_team
        return new, loss_team
Beispiel #2
0
def train_line_model(state_path, model_path, scope, output_model_path, heros):
    state_file = open(state_path, "r")
    model = LineModel_DQN(279, 48, heros, scope=scope)
    if model_path is not None:
        model.load(model_path)

    lines = state_file.readlines()
    for idx in range(len(lines)):
        state_info = StateUtil.parse_state_log(lines[idx])
        if len(state_info.actions) > 0:
            # 去掉最后几帧没有reward的情况
            flag = 0
            for action in state_info.actions:
                if action.reward == None:
                    flag = flag + 1
            if flag == 0:
                prev_state_info = StateUtil.parse_state_log(lines[idx-1])
                next_state_info = StateUtil.parse_state_log(lines[idx+1])
                model.get_action(prev_state_info, state_info, '27', '28')

                added = model.remember(prev_state_info, state_info, next_state_info)
                if added:
                    # 需要手动添加
                    model.act_times += 1
                    model1_memory_len = model.get_memory_size()
                    if model.if_replay(64):
                        # print ('开始模型训练')
                        model.replay(64)
                        if model1_memory_len > 0 and model1_memory_len % 1000 == 0:
                            save_dir = output_model_path + str(model.get_memory_size())
                            os.makedirs(save_dir)
                            model.save(save_dir + '/model')
Beispiel #3
0
    def cal_target_ppo_2(prev_state, cur_state, next_state, hero_name,
                         rival_hero_name, line_idx):
        LineModel_PPO1.assert_tower_in_input(cur_state, hero_name,
                                             rival_hero_name)

        # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化
        # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        rival_team = cur_rival_hero.team
        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_hero = next_state.get_hero(hero_name)
        next_rival_hero = next_state.get_hero(rival_hero_name)
        # 找到英雄附近死亡的敌方小兵
        dead_units = StateUtil.get_dead_units_in_line(
            next_state, rival_team, line_idx, cur_hero,
            StateUtil.GOLD_GAIN_RADIUS)
        dead_golds = sum([
            StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units
        ])

        # 如果英雄有小额金币变化,则忽略
        gold_delta = next_hero.gold - cur_hero.gold
        if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int(
                dead_golds / 2) + 3:
            gold_delta -= 3

        # 很难判断英雄的最后一击,所以我们计算金币变化,超过死亡单位一半的金币作为英雄获得金币
        if gold_delta > 0:
            gold_delta = gold_delta * 2 - dead_golds
            if gold_delta < 0:
                print('获得击杀金币不应该小于零', cur_state.tick, 'dead_golds', dead_golds,
                      'gold_delta', (next_hero.gold - cur_hero.gold))
                gold_delta = 0

        # if dead_golds > 0:
        #     print('dead_gold', dead_golds, 'delta_gold', gold_delta, "hero", hero_name, "tick", cur_state.tick)

        reward = float(gold_delta) / 100

        # 将所有奖励缩小
        final_reward = reward / 100
        final_reward = min(max(final_reward, -1), 1)

        # 特殊奖励,放在最后面
        # 英雄击杀最后一击,直接最大奖励(因为gamma的存在,扩大这个惩罚)
        if cur_rival_hero.hp > 0 and next_rival_hero.hp <= 0:
            # print('对线英雄%s死亡' % rival_hero_name)
            dmg_hit_rival = next_state.get_hero_total_dmg(
                hero_name, rival_hero_name)
            if dmg_hit_rival > 0:
                # print('英雄%s对对方造成了最后一击' % hero_name)
                final_reward = 1
                if cur_hero.hp > 0 and next_hero.hp <= 0:
                    final_reward = 0
        elif cur_hero.hp > 0 and next_hero.hp <= 0:
            print('英雄死亡')
            final_reward = -1
        return final_reward
Beispiel #4
0
 def get_action(selected,
                state_info,
                hero,
                hero_name,
                rival_hero,
                revert=False):
     if selected < 8:  # move
         fwd = StateUtil.mov(selected, revert)
         tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15,
                               hero.pos.y + fwd.y * 15,
                               hero.pos.z + fwd.z * 15)
         action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None,
                            tgtpos, None, None, selected, None)
         return action
     elif selected < 18:  # 对敌英雄,塔,敌小兵1~8使用普攻
         if selected == 8:  # 敌方塔
             tower = StateUtil.get_nearest_enemy_tower(
                 state_info, hero_name, StateUtil.ATTACK_UNIT_RADIUS)
             tgtid = tower.unit_name
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
         elif selected == 9:  # 敌方英雄
             tgtid = rival_hero
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
         else:  # 小兵
             creeps = StateUtil.get_nearby_enemy_units(
                 state_info, hero_name)
             n = selected - 10
             tgtid = creeps[n].unit_name
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
     elif selected < 48:  # skill
         skillid = int((selected - 18) / 10 + 1)
         [tgtid, tgtpos] = LineModel.choose_skill_target(
             selected - 18 - (skillid - 1) * 10, state_info, skillid,
             hero_name, hero.pos, rival_hero)
         if tgtpos is None:
             fwd = None
         else:
             fwd = tgtpos.fwd(hero.pos)
         action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid,
                            tgtpos, fwd, None, selected, None)
         return action
     elif selected == 48:  # hold
         # print("轮到了48号行为-hold")
         action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None,
                            hero.pos, None, None, 48, None)
         return action
     else:  # 撤退
         retreat_pos = StateUtil.get_retreat_pos(state_info,
                                                 hero,
                                                 line_index=1)
         action = CmdAction(hero_name, CmdActionEnum.RETREAT, None, None,
                            retreat_pos, None, None, selected, None)
         return action
Beispiel #5
0
 def upgrade_skills(self, state_info, hero_name):
     # 如果有可以升级的技能,优先升级技能3
     hero = state_info.get_hero(hero_name)
     skills = StateUtil.get_skills_can_upgrade(hero)
     if len(skills) > 0:
         skillid = 3 if 3 in skills else skills[0]
         update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE,
                                skillid, None, None, None, None, None, None)
         update_str = StateUtil.build_command(update_cmd)
         return update_str
Beispiel #6
0
    def if_hero_leave_line(state_infos, state_idx, hero_name, line_index):
        if state_idx > 0:
            prev_state = state_infos[state_idx - 1]
            cur_state = state_infos[state_idx]

            # 离线太远就进行惩罚
            prev_hero = prev_state.get_hero(hero_name)
            cur_hero = cur_state.get_hero(hero_name)
            prev_in_line = StateUtil.if_in_line(prev_hero, line_index, 4000)
            cur_in_line = StateUtil.if_in_line(cur_hero, line_index, 4000)
            if prev_in_line >= 0 and cur_in_line == -1:
                return True
        return False
Beispiel #7
0
    def find_skill_targets(state_info, attacker_info, tgt_pos, skill_length,
                           skill_width, is_circle):
        if not is_circle:
            # 首先满足直线距离应该小于技能长度
            tgt_unit_list = [
                unit for unit in state_info.units
                if StateUtil.cal_distance2(attacker_info.pos, unit.pos) <=
                skill_length and not StateUtil.if_unit_tower(unit.unit_name)
            ]
            tgt_hero_list = [
                hero for hero in state_info.heros
                if StateUtil.cal_distance2(attacker_info.pos, hero.pos) <=
                skill_length and hero.hero_name != attacker_info.hero_name
            ]

            # 需要旋转坐标系 x1 = xcosa + ysina, y1 = ycosa - xsina
            # 或者按比例计算最大最下z值(相当于坐标系上的x)
            tgt_units = []
            for unit in tgt_unit_list:
                mid_x = attacker_info.pos.z + StateUtil.cal_distance2(
                    attacker_info.pos, unit.pos) / StateUtil.cal_distance2(
                        attacker_info.pos, tgt_pos) * (tgt_pos - attacker_info)
                if mid_x - skill_width / 2 <= unit.x <= mid_x + skill_width / 2:
                    tgt_units.append(unit)

            tgt_heros = []
            for hero in tgt_hero_list:
                mid_x = attacker_info.pos.z + StateUtil.cal_distance2(
                    attacker_info.pos, hero.pos) / StateUtil.cal_distance2(
                        attacker_info.pos, tgt_pos) * (tgt_pos - attacker_info)
                if mid_x - skill_width / 2 <= hero.x <= mid_x + skill_width / 2:
                    tgt_heros.append(hero)
            return tgt_units, tgt_heros

        else:
            # 根据tgt_pos画圆,计算范围内的敌人
            # 攻击对象不是塔
            tgt_unit_list = [
                unit for unit in state_info.units
                if StateUtil.cal_distance2(tgt_pos, unit.pos) <= skill_length
                and not StateUtil.if_unit_tower(unit.unit_name)
            ]
            tgt_hero_list = [
                hero for hero in state_info.heros
                if StateUtil.cal_distance2(tgt_pos, hero.pos) <= skill_length
                and hero.hero_name != attacker_info.hero_name
            ]
            return tgt_unit_list, tgt_hero_list
Beispiel #8
0
 def if_hero_attack_opponent(hero_action):
     if hero_action is None:
         return False
     if hero_action.tgtid is not None and StateUtil.if_unit_hero(
             hero_action.tgtid):
         return True
     return False
Beispiel #9
0
    def gen_input_hero(self, hero, rival_towers):
        if hero.state == 'out' or hero.hp <= 0:
            return list(np.zeros(13+3*19))

        dis_rival = 10000
        if len(rival_towers) > 0:
            dis_list = [StateUtil.cal_distance2(hero.pos, t.pos) for t in rival_towers]
            dis_rival = min(dis_list)

        hero_input = [self.normalize_value(int(hero.hero_name)),
                  self.normalize_value(hero.pos.x),
                  self.normalize_value(hero.pos.z),
                  self.normalize_value(hero.speed),
                  self.normalize_value(hero.att),
                  # todo: 2 是普攻手长,现只适用于1,2号英雄,其他英雄可能手长不同
                  0.2,
                  self.normalize_value(hero.mag),
                  self.normalize_value(hero.hp),
                  hero.hp/float(hero.maxhp),
                  self.normalize_value(hero.mp),
                  self.normalize_value(dis_rival),
                  hero.team]

        is_enemy_visible = hero.is_enemy_visible()
        hero_input.append(int(is_enemy_visible))

        skill_info1 = SkillUtil.get_skill_info(hero.cfg_id, 1)
        skill_info2 = SkillUtil.get_skill_info(hero.cfg_id, 2)
        skill_info3 = SkillUtil.get_skill_info(hero.cfg_id, 3)

        skill_input1 = self.gen_input_skill(skill_info1, hero.skills[1])
        skill_input2 = self.gen_input_skill(skill_info2, hero.skills[2])
        skill_input3 = self.gen_input_skill(skill_info3, hero.skills[3])
        hero_input=hero_input+skill_input1+skill_input2+skill_input3
        return hero_input
Beispiel #10
0
    def find_next_tgt(state_info, unit, soldier_list):
        # 为了节省计算量,我们只从英雄附近的小兵中寻找被攻击者, 或者是英雄
        heros = StateUtil.get_heros_in_team(state_info, 1 - unit.team_id)
        hero = heros[0]
        min_dis = StateUtil.cal_distance2(hero.pos, unit.pos)

        tgt = hero.hero_name
        for soldier in soldier_list:
            dis = StateUtil.cal_distance2(soldier.pos, unit.pos)
            if dis < min_dis:
                tgt = soldier.unit_name
                min_dis = dis

        if min_dis <= StateUtil.TOWER_ATTACK_RADIUS * 1000:
            return tgt
        return None
Beispiel #11
0
    def play_unit_action(state_info, unit, hero_info, hero_action,
                         near_enemy_units):
        #TODO 需要界定攻击英雄的小兵的范围
        # 最高优先级:如果英雄攻击了对方英雄,周围小兵会优先攻击英雄
        # 考虑攻击范围,当前默认都使用塔的攻击范围
        if PlayEngine.if_hero_attack_opponent(hero_action) \
                and StateUtil.cal_distance(hero_info.pos, unit.pos) <= StateUtil.TOWER_ATTACK_RADIUS:
            state_info = PlayEngine.play_attack(state_info, unit.unit_name,
                                                hero_action.hero_name)

        # 根据attack info来执行动作
        else:
            find_new_tgt = False
            att = state_info.get_attack_info(unit.unit_name)
            if att is not None:
                # 判断被攻击对象是否已经挂了
                defender = state_info.get_unit(att.defer)
                if defender is None or defender.hp <= 0:
                    find_new_tgt = True
                else:
                    state_info = PlayEngine.play_attack(
                        state_info, unit.unit_name, att.defer)

            # 如果丢失对象,攻击最近的敌人
            # 为了节省计算量,我们只从英雄附近的小兵中寻找被攻击者,或者是英雄
            if att is None or find_new_tgt:
                tgt = PlayEngine.find_next_tgt(state_info, unit,
                                               near_enemy_units)
                if tgt is not None:
                    state_info = PlayEngine.play_attack(
                        state_info, unit.unit_name, tgt)
        return state_info
Beispiel #12
0
 def gen_input_building(self,
                        building,
                        query_hero=None,
                        state_info=None,
                        hero_name=None,
                        revert=False):
     if building is None:
         building_info = np.zeros(8)
         building_info = list(building_info)
     else:
         hero_info = state_info.get_hero(hero_name)
         building_info = [
             self.normalize_value(building.pos.x -
                                  query_hero.pos.x if not revert else -(
                                      building.pos.x - query_hero.pos.x)),
             self.normalize_value(building.pos.z -
                                  query_hero.pos.z if not revert else -(
                                      building.pos.z - query_hero.pos.z)),
             self.normalize_value(building.att),
             self.normalize_value(7000),
             self.normalize_value(building.hp),
             self.normalize_value(
                 StateUtil.cal_distance2(building.pos, hero_info.pos)),
             building.team if not revert else 1 - building.team
         ]
         # 添加是否在攻击当前英雄
         attack_info = state_info.if_unit_attack_hero(
             building.unit_name, hero_name)
         if attack_info is None:
             building_info.append(0)
         else:
             building_info.append(1)
     return building_info
Beispiel #13
0
 def units_in_tower_range(units, target_pos):
     num = 0
     for unit in units:
         if StateUtil.cal_distance(
                 unit.pos, target_pos) <= StateUtil.TOWER_ATTACK_RADIUS:
             num += 1
     return num
Beispiel #14
0
def cal_state_log_action_reward(state_path, output_path):
    state_file = open(state_path, "r")
    output = open(output_path, 'w')
    lines = state_file.readlines()

    state_logs = []
    prev_state = None

    for line in lines:
        cur_state = StateUtil.parse_state_log(line)
        if cur_state.tick == StateUtil.TICK_PER_STATE:
            print("clear")
            prev_state = None
        elif prev_state is not None and prev_state.tick >= cur_state.tick:
            print ("clear")
            prev_state = None
        if prev_state is not None:
            state_logs.append(prev_state)
        prev_state = cur_state

    if prev_state is not None:
        state_logs.append(prev_state)

    # 测试计算奖励值
    state_logs_with_reward = LineModel.update_rewards(state_logs)

    for state_with_reward in state_logs_with_reward:
        # 将结果记录到文件
        state_encode = state_with_reward.encode()
        state_json = JSON.dumps(state_encode)
        output.write(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + state_json + "\n")
        output.flush()

    print(len(state_logs))
Beispiel #15
0
    def play_step(state_info, heros, hero_actions):
        # 基本逻辑,如果英雄攻击了对方英雄,周围小兵会优先攻击英雄

        # 执行英雄行为
        for action in hero_actions:
            hero_info = state_info.get_hero(action.hero_name)
            PlayEngine.play_hero_action(state_info, action, hero_info)

        # 只考虑英雄附近的小兵
        played_units = []
        for hero_name in heros:
            hero_info = state_info.get_hero(hero_name)
            hero_action = PlayEngine.find_hero_action(hero_actions, hero_name)
            near_enemy_units = StateUtil.get_nearby_enemy_units(
                state_info, hero_name, StateUtil.LINE_MODEL_RADIUS)
            near_friend_units = StateUtil.get_nearby_friend_units(
                state_info, hero_name, StateUtil.LINE_MODEL_RADIUS)

            # 执行小兵行为
            for unit in near_enemy_units:
                if unit.unit_name not in played_units:
                    played_units.append(unit.unit_name)
                    state_info = PlayEngine.play_unit_action(
                        state_info, unit, hero_info, hero_action,
                        near_friend_units)

            for unit in near_friend_units:
                if unit.unit_name not in played_units:
                    played_units.append(unit.unit_name)
                    state_info = PlayEngine.play_unit_action(
                        state_info, unit, hero_info, hero_action,
                        near_enemy_units)

            # 执行塔的行为
            # 扩大塔的搜索范围
            nearest_enemy_tower = StateUtil.get_nearest_enemy_tower(
                state_info, action.hero_name, StateUtil.LINE_MODEL_RADIUS + 5)
            if nearest_enemy_tower is not None and nearest_enemy_tower.unit_name not in played_units:
                played_units.append(nearest_enemy_tower.unit_name)
                state_info = PlayEngine.play_unit_action(
                    state_info, nearest_enemy_tower, hero_info, hero_action,
                    near_friend_units)

            # 更新Buff信息
            state_info = BuffInfo.update_unit_buffs(state_info, hero_name)
Beispiel #16
0
 def if_hit_by_tower(state_infos, state_idx, state_num, hero_name):
     for i in range(state_num):
         # hit 有延迟
         state_info = state_infos[state_idx + i + 1]
         hit_names = state_info.get_hero_be_attacked_info(hero_name)
         for unit_name in hit_names:
             if StateUtil.if_unit_tower(unit_name):
                 return True
     return False
Beispiel #17
0
 def assert_tower_in_input(cur_state, hero_name, rival_hero):
     # 如果敌方塔要攻击英雄的话,检查塔的信息是不是在input中
     att_info = cur_state.if_tower_attack_hero(hero_name)
     if att_info is not None:
         tower = str(att_info.atker)
         tower_info = cur_state.get_obj(tower)
         hero_info = cur_state.get_hero(hero_name)
         model_input = LineModel_PPO1.gen_input(cur_state, hero_name,
                                                rival_hero)
         if model_input[44] == Line_Input_Lite.normalize_value_static(
                 int(tower)):
             print('yes found attack tower in input', tower, 'distance',
                   model_input[50], 'cal_distance',
                   StateUtil.cal_distance2(tower_info.pos, hero_info.pos))
         else:
             print('not found attack tower in input', tower, 'distance',
                   model_input[50], 'cal_distance',
                   StateUtil.cal_distance2(tower_info.pos, hero_info.pos))
Beispiel #18
0
 def choose_skill_target(selected,
                         state_info,
                         skill_info,
                         hero_name,
                         pos,
                         tgt_hero_name,
                         debug=False):
     hero_info = state_info.get_hero(hero_name)
     if selected == 0:
         # 施法目标为自己
         # 首先判断施法目标是不是只限于敌方英雄
         if skill_info.cast_target == SkillTargetEnum.self and hero_name != str(
                 tgt_hero_name):
             if debug: print("施法目标为self,但是对象不是自己")
             return [-1, None]
         tgtid = hero_name
         # TODO 这里有点问题,如果是目标是自己的技能,是不是要区分下目的,否则fwd计算会出现问题
         tgtpos = None
     if selected <= 4:
         # 攻击对方英雄
         tgt_hero = state_info.get_hero(tgt_hero_name)
         if tgt_hero.team != hero_info.team and not tgt_hero.is_enemy_visible(
         ):
             if debug: print("敌方英雄不可见")
             tgtid = -1
             tgtpos = None
         elif StateUtil.cal_distance(tgt_hero.pos,
                                     pos) > skill_info.cast_distance:
             if debug:
                 print("技能攻击不到对方 %s %s %s" %
                       (tgt_hero_name,
                        StateUtil.cal_distance(
                            tgt_hero.pos, pos), skill_info.cast_distance))
             tgtid = 0
             tgtpos = None
         # 对方英雄死亡时候忽略这个目标
         elif tgt_hero.hp <= 0:
             if debug: print("技能攻击不了对方,对方已经死亡")
             tgtid = -1
             tgtpos = None
         else:
             tgtid = tgt_hero_name
             tgtpos = tgt_hero.pos
     return tgtid, tgtpos
Beispiel #19
0
def guess_action_cal_reward(state_path, output_path):
    state_file = open(state_path, "r")
    output = open(output_path, 'w')
    lines = state_file.readlines()

    state_logs = []
    prev_state = None

    for line in lines:
        cur_state = StateUtil.parse_state_log(line)
        if cur_state.tick == StateUtil.TICK_PER_STATE:
            print("clear")
            prev_state = None
        elif prev_state is not None and prev_state.tick >= cur_state.tick:
            print ("clear")
            prev_state = None
        if prev_state is not None:
            state_logs.append(prev_state)
        prev_state = cur_state

    if prev_state is not None:
        state_logs.append(prev_state)

    # 猜测玩家行为
    for idx in range(1, len(state_logs)-1):
        prev_state = state_logs[idx-1]
        cur_state = state_logs[idx]
        next_state = state_logs[idx+1]

        if cur_state.tick >= 55044:
            db = 1

        hero = prev_state.get_hero("27")
        line_index = 1
        near_enemy_heroes = StateUtil.get_nearby_enemy_heros(prev_state, hero.hero_name,
                                                             StateUtil.LINE_MODEL_RADIUS)
        near_enemy_units = StateUtil.get_nearby_enemy_units(prev_state, hero.hero_name, StateUtil.LINE_MODEL_RADIUS)
        nearest_enemy_tower = StateUtil.get_nearest_enemy_tower(prev_state, hero.hero_name,
                                                                StateUtil.LINE_MODEL_RADIUS)
        near_enemy_units_in_line = StateUtil.get_units_in_line(near_enemy_units, line_index)
        nearest_enemy_tower_in_line = StateUtil.get_units_in_line([nearest_enemy_tower], line_index)
        if len(near_enemy_heroes) != 0 or len(near_enemy_units_in_line) != 0 or len(
                nearest_enemy_tower_in_line) != 0:
            player_action = Replayer.guess_player_action(prev_state, cur_state, next_state, "27", "28")
            action_str = StateUtil.build_command(player_action)
            print('玩家行为分析:' + str(action_str) + ' tick:' + str(prev_state.tick) + ' prev_pos: ' +
                  hero.pos.to_string() + ', cur_pos: ' + cur_state.get_hero(hero.hero_name).pos.to_string())
            prev_state.add_action(player_action)

    # 测试计算奖励值
    state_logs_with_reward = LineModel.update_rewards(state_logs)
    for state_with_reward in state_logs_with_reward:
        # 将结果记录到文件
        state_encode = state_with_reward.encode()
        state_json = JSON.dumps(state_encode)
        output.write(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + state_json + "\n")
        output.flush()

    print(len(state_logs))
Beispiel #20
0
def replay_battle_log(log_path, state_path, hero_names, model_path=None, save_model_path=None):
    path = log_path
    file = open(path, "r")
    state_file = open(state_path, 'w')
    lines = file.readlines()

    state_logs = []
    prev_state = None
    model = LineModel(279, 48, hero_names)
    if model_path is not None:
        model.load(model_path)
    if save_model_path is not None:
        model.save(save_model_path)

    line_trainer = LineTrainer(hero_names)
    for line in lines:
        if prev_state is not None and int(prev_state.tick) > 248556:
            i = 1

        cur_state = StateUtil.parse_state_log(line)
        if cur_state.tick == StateUtil.TICK_PER_STATE:
            print("clear")
            prev_state = None
        elif prev_state is not None and prev_state.tick >= cur_state.tick:
            print ("clear")
            prev_state = None
        state_info = StateUtil.update_state_log(prev_state, cur_state)

        # 测试对线模型
        rsp_str = line_trainer.build_response(state_info, prev_state, model, hero_names)
        print(rsp_str)
        prev_state = state_info
        state_logs.append(state_info)

    # 测试计算奖励值
    for state in state_logs:
        # 将结果记录到文件
        state_encode = state.encode()
        state_json = JSON.dumps(state_encode)
        state_file.write(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " -- " + state_json + "\n")
        state_file.flush()
    print(len(state_logs))
Beispiel #21
0
 def policy_move_retreat(hero_info):
     if hero_info.team == 0:
         mov_idx = 6
     else:
         mov_idx = 0
     fwd = StateUtil.mov(mov_idx)
     tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15,
                           hero_info.pos.y + fwd.y * 15,
                           hero_info.pos.z + fwd.z * 15)
     action = CmdAction(hero_info.hero_name, CmdActionEnum.MOVE, None, None,
                        tgtpos, None, None, mov_idx, None)
     return action
Beispiel #22
0
    def guess_hero_actions(self, state_index, real_heros=None):
        prev_state = self.state_cache[state_index - 1]
        cur_state = self.state_cache[state_index]
        next_state = self.state_cache[state_index + 1]

        # 如果有必要的话,更新这一帧中真人玩家的行为信息
        if real_heros is not None:
            for hero_name in real_heros:
                hero_action = Replayer.guess_player_action(prev_state, cur_state, next_state, hero_name, '28')
                cur_state.add_action(hero_action)
                action_str = StateUtil.build_command(hero_action)
                print('玩家行为分析:' + str(action_str) + ' tick:' + str(cur_state.tick))
Beispiel #23
0
    def policy_attack_rival_unit(hero_info, rival_hero_info, state_info,
                                 hero_name, rival_near_units, rival_near_tower,
                                 near_friend_units):
        # 如果附近没有敌方英雄,而且不在塔下,且有己方小兵
        # 攻击敌方小兵
        if (rival_hero_info.hp <= 0 or StateUtil.cal_distance(hero_info.pos, rival_hero_info.pos) >= LineTrainerPolicy.SAFE_RIVAL_HERO_DISTANCE) and \
                        rival_near_tower is None and len(near_friend_units) > 0:
            # 优先攻击快没有血的
            for unit in rival_near_units:
                if unit.hp <= hero_info.att - 20:
                    action = LineTrainerPolicy.get_attack_unit_action(
                        state_info, hero_name, unit.unit_name, 0)
                    print("启动策略 如果附近没有敌方英雄,而且不在塔下,补兵 " + hero_name)
                    return action

            # 如果敌方小兵在攻击自己,后撤到己方的小兵后面
            for unit in rival_near_units:
                att = state_info.if_unit_attack_hero(unit.unit_name, hero_name)
                if att is not None:
                    # 优先物理攻击
                    retreat = LineTrainerPolicy.policy_move_retreat(hero_info)
                    print("启动策略 被小兵攻击的情况下后撤 " + hero_name)
                    return retreat

            # 物理攻击,不攻击血量较少的,留给补刀
            # 选择距离较近的(离己方塔)
            rival_near_units_sorted = list(rival_near_units)
            basement_pos = StateUtil.get_basement(hero_info)
            rival_near_units_sorted.sort(
                key=lambda u: math.fabs(basement_pos.x - u.pos.x),
                reverse=False)
            for unit in rival_near_units_sorted:
                if unit.hp > hero_info.att * 3:
                    action = LineTrainerPolicy.get_attack_unit_action(
                        state_info, hero_name, unit.unit_name, 0)
                    print("启动策略 如果附近没有敌方英雄,而且不在塔下,攻击敌方小兵 " + hero_name)
                    return action
        return None
Beispiel #24
0
 def get_attack_unit_action(state_info, hero_name, unit_name, skill_id):
     creeps = StateUtil.get_nearby_enemy_units(state_info, hero_name)
     unit_idx = [c.unit_name for c in creeps].index(unit_name)
     action_idx = unit_idx + 10 * skill_id + 10
     if skill_id >= 1:
         action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, unit_name,
                            None, None, None, action_idx, None)
     else:
         tgtpos = creeps[unit_idx].pos
         hero = state_info.get_hero(hero_name)
         fwd = tgtpos.fwd(hero.pos)
         action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id,
                            unit_name, tgtpos, fwd, None, action_idx, None)
     return action
Beispiel #25
0
 def use_skill3_correctly(state_info, hero_info, rival_hero_info,
                          rival_near_units, action_ratios):
     if hero_info.cfg_id == '101':
         rival_hero_dis = StateUtil.cal_distance(hero_info.pos,
                                                 rival_hero_info.pos)
         if rival_hero_dis > LineTrainerPolicy.SKILL_RANGE_CHAERSI_SKILL3:
             if LineTrainerPolicy.units_in_range(
                     rival_near_units, hero_info.pos,
                     LineTrainerPolicy.SKILL_RANGE_CHAERSI_SKILL3) == 0:
                 print('策略选择', state_info.battleid, hero_info.hero_name,
                       '查尔斯大招不应该在没人的时候使用')
                 for i in range(38, 48):
                     action_ratios[i] = -1
     return action_ratios
Beispiel #26
0
 def get_closest_fwd(fwd):
     maxcos = -1
     output_index = 0
     output_fwd = fwd
     for i in range(8):
         fwd1 = StateUtil.mov(i)
         a = fwd.x * fwd.x + fwd.z * fwd.z
         b = fwd1.x * fwd1.x + fwd1.z * fwd1.z
         cos = (fwd.x * fwd1.x + fwd.z * fwd1.z) / (math.sqrt(a) *
                                                    math.sqrt(b))
         if cos > maxcos:
             maxcos = cos
             output_index = i
             output_fwd = fwd1
     return [output_fwd, output_index]
Beispiel #27
0
    def gen_input_hero(self, hero, query_hero, rival_towers, revert=False):
        if hero.state == 'out' or hero.hp <= 0:
            return list(np.zeros(16 + 3 * 17))

        dis_rival = 10000
        if len(rival_towers) > 0:
            dis_list = [
                StateUtil.cal_distance2(hero.pos, t.pos) for t in rival_towers
            ]
            dis_rival = min(dis_list)

        hero_input = [
            self.normalize_value(hero.pos.x -
                                 query_hero.pos.x if not revert else -(
                                     hero.pos.x - query_hero.pos.x)),
            self.normalize_value(hero.pos.z -
                                 query_hero.pos.z if not revert else -(
                                     hero.pos.z - query_hero.pos.z)),
            self.normalize_value(hero.speed),
            self.normalize_value(hero.att),
            self.normalize_value(hero.attspeed),
            self.normalize_value(hero.attpen),
            self.normalize_value(hero.attpenrate),
            # # todo: 2 是普攻手长,现只适用于1,2号英雄,其他英雄可能手长不同
            # 0.2,
            self.normalize_value(hero.hp),
            hero.hp / float(hero.maxhp),
            self.normalize_value(hero.hprec),
            self.normalize_value(hero.mp),
            self.normalize_value(hero.mag),
            self.normalize_value(hero.magpen),
            self.normalize_value(hero.magpenrate),
            self.normalize_value(dis_rival),
            hero.team if not revert else 1 - hero.team
        ]

        # is_enemy_visible = hero.is_enemy_visible()
        # hero_input.append(int(is_enemy_visible))

        skill_info1 = SkillUtil.get_skill_info(hero.cfg_id, 1)
        skill_info2 = SkillUtil.get_skill_info(hero.cfg_id, 2)
        skill_info3 = SkillUtil.get_skill_info(hero.cfg_id, 3)

        skill_input1 = self.gen_input_skill(skill_info1, hero.skills[1])
        skill_input2 = self.gen_input_skill(skill_info2, hero.skills[2])
        skill_input3 = self.gen_input_skill(skill_info3, hero.skills[3])
        hero_input = hero_input + skill_input1 + skill_input2 + skill_input3
        return hero_input
Beispiel #28
0
 def keep_away_from(state_info, hero_info, rival_hero_info, action_ratios,
                    danger_pos, danger_radius):
     changed = False
     maxQ = max(action_ratios)
     selected = action_ratios.index(maxQ)
     if maxQ == -1:
         return action_ratios
     for selected in range(len(action_ratios)):
         if action_ratios[selected] == -1:
             continue
         if selected < 8:
             fwd = StateUtil.mov(selected)
             tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 0.5,
                                   hero_info.pos.y + fwd.y * 0.5,
                                   hero_info.pos.z + fwd.z * 0.5)
             if StateUtil.cal_distance(tgtpos, danger_pos) <= danger_radius:
                 print('策略选择', state_info.battleid, hero_info.hero_name,
                       '移动方向会进入危险区域', hero_info.pos.to_string(),
                       tgtpos.to_string())
                 action_ratios[selected] = -1
         elif selected < 18:  # 对敌英雄,塔,敌小兵1~8使用普攻, 针对近战英雄的检测
             if selected == 8:  # 敌方塔
                 print('策略选择', state_info.battleid, hero_info.hero_name,
                       '不要去攻击塔')
                 action_ratios[selected] = -1
             elif selected == 9:  # 敌方英雄
                 if StateUtil.cal_distance(rival_hero_info.pos,
                                           danger_pos) <= danger_radius:
                     print('策略选择', state_info.battleid, hero_info.hero_name,
                           '不要去近身攻击塔范围内的英雄')
                     action_ratios[selected] = -1
             else:  # 小兵
                 creeps = StateUtil.get_nearby_enemy_units(
                     state_info, hero_info.hero_name)
                 n = selected - 10
                 tgt = creeps[n]
                 if StateUtil.cal_distance(tgt.pos,
                                           danger_pos) <= danger_radius:
                     print('策略选择', state_info.battleid, hero_info.hero_name,
                           '不要去近身攻击塔范围内的小兵')
                     action_ratios[selected] = -1
         elif hero_info.cfg_id == '101' and 28 <= selected < 38:  # 专门针对查尔斯的跳跃技能
             skillid = int((selected - 18) / 10 + 1)
             [tgtid, tgtpos] = LineModel.choose_skill_target(
                 selected - 18 - (skillid - 1) * 10, state_info, skillid,
                 hero_info.hero_name, hero_info.pos,
                 rival_hero_info.hero_name)
             if tgtpos is not None:
                 if StateUtil.cal_distance(tgtpos,
                                           danger_pos) <= danger_radius:
                     print('策略选择', state_info.battleid, hero_info.hero_name,
                           '跳跃技能在朝着塔下的目标')
                     action_ratios[selected] = -1
     return action_ratios
Beispiel #29
0
    def search_team_battle_hero(self, state_info, hero):
        # 检查是否有团战,并且得到团战的范围内所有的单位
        # 团战范围的定义
        # 首先从一个英雄开始找起,如果它周围有敌人,就把敌人和自己人全都列为范围内,然后用新的人物继续寻找
        # 注:这里只找一个开团点
        checked_heros = set()
        team_battle_heros = set()

        # 找到第一个周围有敌人的
        team_battle_heros.add(hero)

        while len(checked_heros) < len(team_battle_heros):
            for hero in team_battle_heros.copy():
                if hero not in checked_heros:
                    near_enemy_heroes = StateUtil.get_nearby_enemy_heros(
                        state_info, hero, TeamBattleTrainer.MODEL_RANGE)
                    for enemy in near_enemy_heroes:
                        team_battle_heros.add(enemy.hero_name)
                    checked_heros.add(hero)

        return team_battle_heros
Beispiel #30
0
 def gen_input_building(self,building, state_info=None, hero_name=None):
     if building is None:
         building_info=np.zeros(9)
         building_info=list(building_info)
     else:
         hero_info = state_info.get_hero(hero_name)
         building_info=[self.normalize_value(int(building.unit_name)),
                        self.normalize_value(building.pos.x),
                        self.normalize_value(building.pos.z),
                        self.normalize_value(building.att),
                        self.normalize_value(7000),
                        self.normalize_value(building.hp),
                        self.normalize_value(StateUtil.cal_distance2(building.pos, hero_info.pos)),
                        building.team]
         # 添加是否在攻击当前英雄
         attack_info = state_info.if_unit_attack_hero(building.unit_name, hero_name)
         if attack_info is None:
             building_info.append(0)
         else:
             building_info.append(1)
     return building_info