def calc_pdparam(self, x, evaluate=True, net=None): ''' The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist. ''' net = self.net if net is None else net if evaluate: pdparam = net.wrap_eval(x) else: net.train() pdparam = net(x) logger.debug(f'pdparam: {pdparam}') return pdparam
def calc_pdparam(self, x, evaluate=True, net=None): ''' To get the pdparam for action policy sampling, do a forward pass of the appropriate net, and pick the correct outputs. The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist. ''' net = self.net if net is None else net if evaluate: pdparam = net.wrap_eval(x) else: net.train() pdparam = net(x) logger.debug(f'pdparam: {pdparam}') return pdparam
def calc_v(self, x, evaluate=True, net=None): ''' Forward-pass to calculate the predicted state-value from critic. ''' net = self.net if net is None else net if self.shared: # output: policy, value if evaluate: out = net.wrap_eval(x) else: net.train() out = net(x) v = out[-1].squeeze_(dim=1) # get value only else: if evaluate: out = self.critic.wrap_eval(x) else: self.critic.train() out = self.critic(x) v = out.squeeze_(dim=1) logger.debug(f'v: {v}') return v
def calc_pdparam(self, x, evaluate=True, net=None): ''' The pdparam will be the logits for discrete prob. dist., or the mean and std for continuous prob. dist. ''' net = self.net if net is None else net if evaluate: pdparam = net.wrap_eval(x) else: net.train() pdparam = net(x) if self.share_architecture: # MLPHeterogenousTails, get front (no critic) if self.body.is_discrete: pdparam = pdparam[0] else: if len(pdparam) == 2: # only (loc, scale) and (v) pdparam = pdparam[0] else: pdparam = pdparam[:-1] logger.debug(f'pdparam: {pdparam}') return pdparam
def calc_v(self, x, evaluate=True, net=None): ''' Forward-pass to calculate the predicted state-value from critic. ''' net = self.net if net is None else net if self.share_architecture: if evaluate: out = net.wrap_eval(x) else: net.train() out = net(x) # MLPHeterogenousTails, get last v = out[-1].squeeze_(dim=1) else: if evaluate: out = self.critic.wrap_eval(x) else: self.critic.train() out = self.critic(x) v = out.squeeze_(dim=1) logger.debug(f'v: {v}') return v