Esempio n. 1
0
File: agents.py Progetto: fzwqq/JRTS
    def ctf_adv(self,info,device):
        # return 0
        obs_t, ua, start_at, ev_s, = info
        obs_t = torch.from_numpy(obs_t).unsqueeze(0).float().to(device)
        unit, act = ua
        u_f = torch.from_numpy(unit_feature_encoder(unit,self.map_size)).unsqueeze(0).float().to(device)

        with torch.no_grad():
            v, p, _ = self.brain.forward(obs_t, u_f, unit.type)
            # obs_act = copy.deepcopy(obs_t)
            # obs_act[0,-7:,unit.x, unit.y][act] = 1 
            # v = self.brain.critic_forward(obs_act)
            # p, _ = self.brain.actor_forward(unit.type, obs_t, u_f)

            u_x, u_y = unit.x, unit.y
            # print(torch.eye(AGENT_ACTIONS_MAP[ua[0].type].__members__.items().__len__(), 7))
            # input()
            act_list = [i for i in torch.eye(AGENT_ACTIONS_MAP[unit.type].__members__.items().__len__(), 7)]
            ctf_obs_li = []
            for x in act_list:
                obs_t[0,-7:,u_x,u_y] = x
                ctf_obs_li.append(copy.deepcopy(obs_t))
            
            ctf_v = self.brain.critic_forward(torch.cat(ctf_obs_li))

            adv = sum(p.squeeze() * ctf_v.squeeze()) 

        return float(adv)
Esempio n. 2
0
    def _encode_sample(self, idxes):
        # print(idxes)
        states, unit_types, units, actions = [], [], [], []
        for i in idxes:
            data = self._storage[i]
            state, t_player, unit, action = data.gs, data.t_player, data.unit, data.action
            # if action.type == 5:
            #     input()
            states.append(state_encoder(gs=state, player=t_player))
            units.append(
                unit_feature_encoder(unit,
                                     (state.pgs.height, state.pgs.width)))
            actions.append(game_action_translator(unit, action))
            unit_types.append(unit.type)

        return np.array(states), np.array(unit_types), np.array(
            units), np.array(actions)
Esempio n. 3
0
    def _encode_sample(self, idxes):
        """[summary]
        
        Arguments:
            idxes {[type]} -- [description]
        
        Returns: Batch list of
            unit_types {np.array} -- [description]
            states {np.array} --
            units {np.array} -- unit features including utt for ac network input
            actions {np.array} -- network action for states
            next_states {np.array} --
            rewards {np.array} -- 
            done_masks {np.array}

        """
        # print(idxes)
        unit_types, states, units, actions, next_states, rewards, hxses, done_masks = [], [], [], [], [], [],[], []

        for i in idxes:
            transition = self._storage[i]

            state, unit_action, next_state, reward, hxs, done = transition.__dict__.values(
            )
            map_size = state.shape[-2:]

            u, a = unit_action

            unit_types.append(u.type)
            states.append(state)
            units.append(
                np.hstack((unit_feature_encoder(u, map_size),
                           encoded_utt_dict[u.type])))
            actions.append(a)
            next_states.append(next_state)
            hxses.append(hxs)
            rewards.append(reward)
            done_masks.append(done)

            # for u, a in unit_actions:
            #     unit_types.append(u.type)
            #     states.append(state)
            #     units.append(np.hstack((unit_feature_encoder(u, map_size),encoded_utt_dict[u.type])))
            #     actions.append(a)
            #     next_states.append(next_state)
            #     rewards.append(reward)
            #     done_masks.append(done)

            # states.append(state_encoder(gs=state, player=t_player))
            # units.append(unit_feature_encoder(u, state.pgs.height, state.pgs.width))
            # actions.append(game_action_translator(u, a))
            # unit_types.append(u.type)

        return  np.array(unit_types),   \
                np.array(states),       \
                np.array(units),        \
                np.array(actions),      \
                np.array(next_states),  \
                np.array(rewards),      \
                np.array(hxses),        \
                np.array(done_masks)