def ctf_adv(self,info,device): # return 0 obs_t, ua, start_at, ev_s, = info obs_t = torch.from_numpy(obs_t).unsqueeze(0).float().to(device) unit, act = ua u_f = torch.from_numpy(unit_feature_encoder(unit,self.map_size)).unsqueeze(0).float().to(device) with torch.no_grad(): v, p, _ = self.brain.forward(obs_t, u_f, unit.type) # obs_act = copy.deepcopy(obs_t) # obs_act[0,-7:,unit.x, unit.y][act] = 1 # v = self.brain.critic_forward(obs_act) # p, _ = self.brain.actor_forward(unit.type, obs_t, u_f) u_x, u_y = unit.x, unit.y # print(torch.eye(AGENT_ACTIONS_MAP[ua[0].type].__members__.items().__len__(), 7)) # input() act_list = [i for i in torch.eye(AGENT_ACTIONS_MAP[unit.type].__members__.items().__len__(), 7)] ctf_obs_li = [] for x in act_list: obs_t[0,-7:,u_x,u_y] = x ctf_obs_li.append(copy.deepcopy(obs_t)) ctf_v = self.brain.critic_forward(torch.cat(ctf_obs_li)) adv = sum(p.squeeze() * ctf_v.squeeze()) return float(adv)
def _encode_sample(self, idxes): # print(idxes) states, unit_types, units, actions = [], [], [], [] for i in idxes: data = self._storage[i] state, t_player, unit, action = data.gs, data.t_player, data.unit, data.action # if action.type == 5: # input() states.append(state_encoder(gs=state, player=t_player)) units.append( unit_feature_encoder(unit, (state.pgs.height, state.pgs.width))) actions.append(game_action_translator(unit, action)) unit_types.append(unit.type) return np.array(states), np.array(unit_types), np.array( units), np.array(actions)
def _encode_sample(self, idxes): """[summary] Arguments: idxes {[type]} -- [description] Returns: Batch list of unit_types {np.array} -- [description] states {np.array} -- units {np.array} -- unit features including utt for ac network input actions {np.array} -- network action for states next_states {np.array} -- rewards {np.array} -- done_masks {np.array} """ # print(idxes) unit_types, states, units, actions, next_states, rewards, hxses, done_masks = [], [], [], [], [], [],[], [] for i in idxes: transition = self._storage[i] state, unit_action, next_state, reward, hxs, done = transition.__dict__.values( ) map_size = state.shape[-2:] u, a = unit_action unit_types.append(u.type) states.append(state) units.append( np.hstack((unit_feature_encoder(u, map_size), encoded_utt_dict[u.type]))) actions.append(a) next_states.append(next_state) hxses.append(hxs) rewards.append(reward) done_masks.append(done) # for u, a in unit_actions: # unit_types.append(u.type) # states.append(state) # units.append(np.hstack((unit_feature_encoder(u, map_size),encoded_utt_dict[u.type]))) # actions.append(a) # next_states.append(next_state) # rewards.append(reward) # done_masks.append(done) # states.append(state_encoder(gs=state, player=t_player)) # units.append(unit_feature_encoder(u, state.pgs.height, state.pgs.width)) # actions.append(game_action_translator(u, a)) # unit_types.append(u.type) return np.array(unit_types), \ np.array(states), \ np.array(units), \ np.array(actions), \ np.array(next_states), \ np.array(rewards), \ np.array(hxses), \ np.array(done_masks)