Beispiel #1
0
 async def _world_state_from_reader(cls, reader, team_id):
     # Receive the package length.
     data = await reader.read(cls.WORLDSTATE_PAYLOAD_BYTES)
     if len(data) != cls.WORLDSTATE_PAYLOAD_BYTES:
         # raise ValueError('Invalid worldstate payload')
         return None
     n_bytes = unpack("@I", data)[0]
     # Receive the payload given the length.
     data = await asyncio.wait_for(reader.read(n_bytes), timeout=2)
     # Decode the payload.
     world_state = CMsgBotWorldState()
     world_state.ParseFromString(data)
     logger.debug('Received world_state: dotatime={}, gamestate={}, team={}'.format(
         world_state.dota_time, world_state.game_state, team_id))
     return world_state
Beispiel #2
0
async def start(host='127.0.0.1', port=30001):
    reader, writer = await asyncio.open_connection(host, port)
    print('reader=', reader)
    print('writer=', writer)
    while True:
        x = await reader.read()
        print(x)
        continue
        data = await reader.read(4)
        if data == b'':
            continue
        print('data=', data)
        n_bytes = unpack("@I", data)[0]
        print('n_bytes=', n_bytes)
        # Receive the payload given the length.
        data = await asyncio.wait_for(reader.read(n_bytes), timeout=2)
        print(data)
        world_state = CMsgBotWorldState()
        world_state.ParseFromString(data)
        print(world_state.dota_time)
Beispiel #3
0
    def action_to_pb(self, action_dict, state, unit_handles):
        # TODO(tzaman): Recrease the scope of this function. Make it a converter only.
        hero_unit = get_unit(state, player_id=self.player_id)
        action_pb = CMsgBotWorldState.Action()
        action_pb.actionDelay = 0  # action_dict['delay'] * DELAY_ENUM_TO_STEP
        action_enum = action_dict['enum']

        if action_enum == 0:
            action_pb.actionType = CMsgBotWorldState.Action.Type.Value(
                'DOTA_UNIT_ORDER_NONE')
        elif action_enum == 1:
            action_pb.actionType = CMsgBotWorldState.Action.Type.Value(
                'DOTA_UNIT_ORDER_MOVE_DIRECTLY')
            m = CMsgBotWorldState.Action.MoveToLocation()
            hero_location = hero_unit.location
            m.location.x = hero_location.x + Policy.MOVE_ENUMS[
                action_dict['x']]
            m.location.y = hero_location.y + Policy.MOVE_ENUMS[
                action_dict['y']]
            m.location.z = 0
            action_pb.moveDirectly.CopyFrom(m)
        elif action_enum == 2:
            action_pb.actionType = CMsgBotWorldState.Action.Type.Value(
                'DOTA_UNIT_ORDER_ATTACK_TARGET')
            m = CMsgBotWorldState.Action.AttackTarget()
            if 'target_unit' in action_dict:
                m.target = unit_handles[action_dict['target_unit']]
            else:
                m.target = -1
            m.once = True
            action_pb.attackTarget.CopyFrom(m)
        elif action_enum == 3:
            action_pb = CMsgBotWorldState.Action()
            action_pb.actionType = CMsgBotWorldState.Action.Type.Value(
                'DOTA_UNIT_ORDER_CAST_NO_TARGET')
            action_pb.cast.abilitySlot = action_dict['ability']
        else:
            raise ValueError("unknown action {}".format(action_enum))
        action_pb.player = self.player_id
        return action_pb
Beispiel #4
0
 def train_ability(self, hero_unit):
     # Check if we leveled up
     leveled_up = hero_unit.level > self.prev_level
     if leveled_up:
         self.prev_level = hero_unit.level
         # Just try to level up the first ability.
         action_pb = CMsgBotWorldState.Action()
         action_pb.actionType = CMsgBotWorldState.Action.Type.Value(
             'DOTA_UNIT_ORDER_TRAIN_ABILITY')
         action_pb.player = self.player_id
         action_pb.trainAbility.ability = "nevermore_shadowraze1"
         return action_pb
     return None
Beispiel #5
0
    async def play(self, config, game_id):
        logger.info('Starting game.')

        # Use the latest weights by default.
        use_latest_weights = {TEAM_RADIANT: True, TEAM_DIRE: True}
        if random.random() > self.latest_weights_prob:
            # Randomly pick the ream that will use the old weights.
            old_model_team = random.choice([TEAM_RADIANT, TEAM_DIRE])
            use_latest_weights[old_model_team] = False

        drawing = Drawing(
        )  # TODO(tzaman): drawing should include include what's visible to the player

        # Reset and obtain the initial observation. This dictates who we are controlling,
        # this is done before the player definition, because there might be humand playing
        # that take up bot positions.
        response = await asyncio.wait_for(self.dota_service.reset(config),
                                          timeout=120)

        player_request = config.hero_picks
        players_response = response.players  # Lists all human and bot players.
        players = {TEAM_RADIANT: [], TEAM_DIRE: []}
        for p_req, p_res in zip(player_request, players_response):
            assert p_req.team_id == p_req.team_id  # TODO(tzaman): more tests?
            if p_res.is_bot and p_req.control_mode == HERO_CONTROL_MODE_CONTROLLED:
                player = Player(
                    game_id=game_id,
                    player_id=p_res.id,
                    team_id=p_res.team_id,
                    hero=p_res.hero,
                    experience_channel=self.experience_channel,
                    use_latest_weights=use_latest_weights[p_res.team_id],
                    drawing=drawing,
                    validation=self.validation,
                )
                players[p_res.team_id].append(player)

        prev_obs = {
            TEAM_RADIANT: response.world_state_radiant,
            TEAM_DIRE: response.world_state_dire,
        }
        done = False
        step = 0
        dota_time = -float('Inf')
        end_state = None
        while dota_time < self.max_dota_time:
            reward_sum_step = {TEAM_RADIANT: 0, TEAM_DIRE: 0}
            for team_id in [TEAM_RADIANT, TEAM_DIRE]:
                logger.debug('\ndota_time={:.2f}, team={}'.format(
                    dota_time, team_id))

                response = await self.dota_service.observe(
                    ObserveConfig(team_id=team_id))
                if response.status != Status.Value('OK'):
                    end_state = response.status
                    done = True
                    break
                obs = response.world_state
                dota_time = obs.dota_time

                # We not loop over each player in this team and get each players action.
                actions = []
                for player in players[team_id]:
                    player.compute_reward(prev_obs=prev_obs[team_id], obs=obs)
                    reward_sum_step[team_id] += sum(
                        player.rewards[-1].values())
                    with torch.no_grad():
                        actions_player = player.obs_to_actions(obs=obs)
                    actions.extend(actions_player)

                actions_pb = CMsgBotWorldState.Actions(actions=actions)
                actions_pb.dota_time = obs.dota_time

                _ = await self.dota_service.act(
                    Actions(actions=actions_pb, team_id=team_id))

                prev_obs[team_id] = obs

            if not self.validation:
                # Subtract eachothers rewards
                for team_id in [TEAM_RADIANT, TEAM_DIRE]:
                    for player in players[team_id]:
                        player.rewards[-1]['enemy'] = -reward_sum_step[
                            OPPOSITE_TEAM[team_id]]

                for player in [*players[TEAM_RADIANT], *players[TEAM_DIRE]]:
                    if player.steps_queued > 0 and player.steps_queued % self.rollout_size == 0:
                        await player.rollout()

            if done:
                break

        if end_state in [
                Status.Value('RESOURCE_EXHAUSTED'),
                Status.Value('FAILED_PRECONDITION'),
                Status.Value('OUT_OF_RANGE')
        ]:
            # Bad end state. We don't want to roll this one out.
            logger.warning(
                'Bad end state `{}`, not rolling out game (dota_time={})'.
                format(end_state, dota_time))
            return
        # drawing.save(stem=game_id)  # HACK

        # Finish (e.g. final rollout or send validation metrics).
        for player in [*players[TEAM_RADIANT], *players[TEAM_DIRE]]:
            player.process_endstate(end_state)
            await player.finish()

        # TODO(tzaman): the worldstate ends when game is over. the worldstate doesn't have info
        # about who won the game: so we need to get info from that somehow

        logger.info('Game finished.')
Beispiel #6
0
    async def run_a_game(self):
        #tracker = SummaryTracker()
        #print('using model id {}'.format(id(self.a3c_model)))
        self.reset()
        self.MODE = np.random.randint(2)
        #print('Mode:{}'.format(self.MODE))
        # start a game
        while True:
            try:
                t_start = time()
                await asyncio.wait_for(self.env.clear(Empty()), timeout=60)
                state = await asyncio.wait_for(self.env.reset(config),
                                               timeout=60)
                #print('start time {}'.format(time() - t_start))
                break
            except Exception as e:
                print('Exception on env.reset: {}'.format(e))
                return

        while True:
            # fetch hero
            #tick_start = time()
            state = state.world_state
            if state.dota_time > 130:
                break
            prev_state = state
            # print(state.dota_time)
            hero = get_hero_unit(state)

            all_units = DotaServiceEnv.get_unit_list(state)

            input_state = []

            hero_loc = (hero.location.x, hero.location.y)

            for unit in all_units:
                loc = [unit.location.x, unit.location.y]
                d = math.sqrt((unit.location.x - hero_loc[0])**2 +
                              (unit.location.y - hero_loc[1])**2)

                if d >= 1200:
                    continue

                if unit is not hero:
                    loc = [
                        hero_loc[0] - unit.location.x,
                        hero_loc[1] - unit.location.y
                    ]

                loc = [loc[0] / 7000., loc[1] / 7000.]

                unit_state = list(loc)
                type_tup = [0] * 6
                if unit.unit_type == CMsgBotWorldState.UnitType.Value(
                        'HERO') and unit.player_id == 0:
                    type_tup[0] = 1
                elif unit.unit_type == CMsgBotWorldState.UnitType.Value(
                        'LANE_CREEP') and unit.team_id == hero.team_id:
                    type_tup[1] = 1
                elif unit.unit_type == CMsgBotWorldState.UnitType.Value(
                        'LANE_CREEP') and unit.team_id != hero.team_id:
                    type_tup[2] = 1
                elif unit.unit_type == CMsgBotWorldState.UnitType.Value(
                        'TOWER') and unit.team_id == hero.team_id:
                    type_tup[3] = 1
                elif unit.unit_type == CMsgBotWorldState.UnitType.Value(
                        'TOWER') and unit.team_id != hero.team_id:
                    type_tup[4] = 1
                else:
                    type_tup[5] = 1
                unit_state.extend(type_tup)

                unit_state = np.array(unit_state)
                unit_state = torch.from_numpy(unit_state).float()
                if unit is hero:
                    hero_state = unit_state
                else:
                    input_state.append(unit_state)

            input_state_wo_hero = sorted(input_state,
                                         key=lambda x: math.hypot(x[0], x[1]))
            input_state = [hero_state]
            input_state.extend(input_state_wo_hero)
            #print(input_state)

            raw_input_state = input_state

            input_state = torch.stack(input_state)

            self.states.append(input_state)

            action_out, value_out = self.a3c_model(input_state)
            #print(action_out , value_out, input_state)

            prob = F.softmax(action_out)
            self.raw_probs.append(prob)
            log_prob = F.log_softmax(action_out)
            self.raw_log_probs.append(log_prob)

            self.values.append(value_out)
            self.log_probs.append(log_prob)

            entropy = -(log_prob * prob).sum(1, keepdim=True)
            self.entropies.append(entropy)

            if self.MODE == MODE_NORMAL:
                action = prob.multinomial(num_samples=1).data
                #action = torch.argmax(log_prob, 1).data.view(-1,1)
            elif self.MODE == MODE_AUTO:
                action = self.default_action(raw_input_state)
            self.actions.append(action)

            action_pb = CMsgBotWorldState.Action()
            action_pb.actionType = CMsgBotWorldState.Action.Type.Value(
                'DOTA_UNIT_ORDER_MOVE_TO_POSITION')
            mx, my = get_moving_vec(action)
            scale = 500
            hloc = hero.location
            m = CMsgBotWorldState.Action.MoveToLocation()
            m.location.x = mx * scale + hloc.x
            m.location.y = my * scale + hloc.y
            m.location.z = 0
            action_pb.moveToLocation.CopyFrom(m)
            action_pb.actionDelay = 0
            # print(action, action_pb)
            # print('tick cost {}'.format(time() - tick_start))

            try:
                state = await asyncio.wait_for(self.env.step(
                    Action(action=action_pb)),
                                               timeout=11)
                reward = calc_reward(state.world_state, prev_state)
                self.rewards.append(reward)

            except Exception as e:
                print('Exception on env.step: {}'.format(repr(e)))
                raise
                break

        self.train()
Beispiel #7
0
    13: 7640,
    14: 8865,
    15: 10115,
    16: 11390,
    17: 12690,
    18: 14015,
    19: 15415,
    20: 16905,
    21: 18405,
    22: 20155,
    23: 22155,
    24: 24405,
    25: 26905
}

action_none = CMsgBotWorldState.Action()
action_none.actionType = CMsgBotWorldState.Action.Type.Value(
    'DOTA_UNIT_ORDER_NONE')


def get_total_xp(level, xp_needed_to_level):
    if level == 25:
        return xp_to_reach_level[level]
    xp_required_for_next_level = xp_to_reach_level[
        level + 1] - xp_to_reach_level[level]
    missing_xp_for_next_level = (xp_required_for_next_level -
                                 xp_needed_to_level)
    return xp_to_reach_level[level] + missing_xp_for_next_level


def get_reward(prev_state, state):
async def main():
    dummy_action = Actions(
        actions=CMsgBotWorldState.Actions(actions=[
            CMsgBotWorldState.Action(
                actionType=CMsgBotWorldState.Action.Type.
                DOTA_UNIT_ORDER_MOVE_TO_POSITION,
                moveToLocation=CMsgBotWorldState.Action.MoveToLocation(
                    # units=[0],
                    location=CMsgBotWorldState.Vector(x=-394, y=-486, z=204)))
        ]),
        team_id=Team.TEAM_RADIANT)
    # Connect to the DotaService.
    env = DotaServiceStub(Channel('192.168.1.17', 13337))

    # Get the initial observation.
    observation = await env.reset(
        GameConfig(
            host_mode=HostMode.HOST_MODE_GUI,
            hero_picks=[
                HeroPick(
                    team_id=Team.TEAM_RADIANT,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_CONTROLLED),
                HeroPick(
                    team_id=Team.TEAM_RADIANT,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_RADIANT,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_RADIANT,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_RADIANT,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_DIRE,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_DIRE,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_DIRE,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_DIRE,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
                HeroPick(
                    team_id=Team.TEAM_DIRE,
                    hero_id=Hero.NPC_DOTA_HERO_PUDGE,
                    control_mode=HeroControlMode.HERO_CONTROL_MODE_DEFAULT),
            ],
            ticks_per_observation=30))

    for _ in range(15):
        # Sample an action from the action protobuf
        # Take an action, returning the resulting observation.

        # print(observation)
        await env.act(dummy_action)
        observation = await env.observe(
            ObserveConfig(team_id=Team.TEAM_RADIANT))
        print(".", end="")

    print()
    move_action = Actions(
        actions=CMsgBotWorldState.Actions(
            # dota_time=observation.world_state.dota_time,
            actions=[
                CMsgBotWorldState.Action(
                    actionDelay=0,
                    actionType=CMsgBotWorldState.Action.Type.
                    DOTA_UNIT_ORDER_MOVE_TO_POSITION,
                    moveToLocation=CMsgBotWorldState.Action.MoveToLocation(
                        # units=[1],  # TODO: Should really get unit ID from worldstate
                        location=CMsgBotWorldState.Vector(x=-394, y=-486,
                                                          z=0)),
                    player=0)
            ]),
        team_id=Team.TEAM_RADIANT)
    print(f"moving {move_action}")
    while True:
        # Sample an action from the action protobuf
        # Take an action, returning the resulting observation.

        # print(observation)
        await env.act(move_action)
        observation = await env.observe(
            ObserveConfig(team_id=Team.TEAM_RADIANT))