Beispiel #1
0
    def model_forward(self, obs, batch_opt=False):
        """obs is dict. values of obs must in numpy, and first dim is batch dim"""
        #就算只有一个环境,返回的状态也会是1x2048,不需要unsqueeze
        model_input = obs.copy()  #防止obs被改变,因为obs在外部还被保存了一次
        for k in model_input:
            model_input[k] = toFloatTensor(model_input[k], self.gpu_id)

        out = self.model.forward(model_input)

        return out
Beispiel #2
0
 def model_forward(self, obs, batch_opt=False):
     """obs is dict. values of obs must in numpy, and first dim is batch dim"""
     #TODO 需要unsqueeze,或者重新封装一下单环境
     model_input = obs.copy()  #防止obs被改变,因为obs在外部还被保存了一次
     for k in model_input:
         model_input[k] = toFloatTensor(model_input[k], self.gpu_id)
         if not batch_opt:
             model_input[k].unsqueeze_(0)
     out = self.model.forward(model_input)
     return out
Beispiel #3
0
    def model_forward(self, obs, batch_opt = False, params = None):

        model_input = obs.copy()
        
        for k in model_input:
            model_input[k] = toFloatTensor(model_input[k], self.gpu_id)
            if not batch_opt:
                model_input[k].unsqueeze_(0)
        model_input['hidden'] = self.hidden
        model_input['action_probs'] = self.probs
        out = self.model.forward(model_input, params)
        out['prob'] = F.softmax(out['policy'], dim = 1)
        out['log_prob'] = F.log_softmax(out['policy'], dim = 1)
        out['entropy'] = (-out['log_prob'] * out['prob']).sum(1)
        return out
Beispiel #4
0
    def model_forward(self, obs, batch_opt=False):
        """obs is dict. values of obs must in numpy, and first dim is batch dim"""
        #就算只有一个环境,返回的状态也会是1x2048,不需要unsqueeze
        model_input = obs.copy()  #防止obs被改变,因为obs在外部还被保存了一次
        if batch_opt:
            model_input['hidden'] = (torch.cat(self.hidden_batch[0][:-1]),
                                     torch.cat(self.hidden_batch[1][:-1]))
            model_input['action_probs'] = torch.cat(self.probs_batch[:-1])
        else:
            model_input['hidden'] = (self.hidden_batch[0][-1],
                                     self.hidden_batch[1][-1])
            model_input['action_probs'] = self.probs_batch[-1]

        for k in model_input:
            model_input[k] = toFloatTensor(model_input[k], self.gpu_id)

        out = self.model.forward(model_input)
        return out
Beispiel #5
0
    def get_pi_v(self, env_state):

        tmp = env_state[list(env_state)[0]]
        target_reper = copy.deepcopy(self.target_reper)
        num_s = 1
        if isinstance(tmp, list):
            num_s = len(tmp)
            target_reper = {
                k:np.expand_dims(v,0).repeat(num_s, 0) for k,v in self.target_reper.items()
                }
        model_input = {}
        model_input.update(env_state)
        model_input.update(target_reper)
        for k in model_input:
            model_input[k] = toFloatTensor(model_input[k], self.gpu_id)
            model_input[k].squeeze_()
            if num_s == 1:
                model_input[k].unsqueeze_(0)
        out = self.model.forward(model_input)
        
        return out['policy'], out['value']
Beispiel #6
0
    def model_forward(self, obs, batch_opt=False):

        model_input = obs.copy()

        for k in model_input:
            model_input[k] = toFloatTensor(model_input[k], self.gpu_id)
            if not batch_opt:
                model_input[k].unsqueeze_(0)
        if batch_opt:
            model_input['hidden'] = (
                self.hidden_batch[0][:-1],
                self.hidden_batch[1][:-1],
            )
            model_input['action_probs'] = self.probs_batch[:-1]
        else:
            model_input['hidden'] = (
                self.hidden_batch[0][-1:],
                self.hidden_batch[1][-1:],
            )
            model_input['action_probs'] = self.probs_batch[-1:]
        out = self.model.forward(model_input)

        return out