def __init__(self, ndim_obs, n_actions, hidden_sizes=(50, 50, 50)): self.pi = policies.SoftmaxPolicy(model=links.MLP( ndim_obs, n_actions, hidden_sizes, nonlinearity=F.tanh)) self.v = links.MLP(ndim_obs, 1, hidden_sizes=hidden_sizes, nonlinearity=F.tanh) super().__init__(self.pi, self.v)
def __init__(self, gpu=False): self.q_func = policies.SoftmaxPolicy(model=QFunction()) self.v_func = VFunction() self.common = SharedFunctionSFCNN() #SharedFunctionCNN() if gpu: self.q_func.to_gpu(0) self.v_func.to_gpu(0) self.common.to_gpu(0) #super(A3CFFSoftmax,self).__init__(self.common,self.q_func, self.v_func) super().__init__(self.common, self.q_func, self.v_func)
def __init__(self, ndim_obs, n_actions, hidden_sizes=(64, 64)): self.pi = policies.SoftmaxPolicy( model=links.MLP(ndim_obs, n_actions, hidden_sizes)) self.v = links.MLP(ndim_obs, 1, hidden_sizes=hidden_sizes) super().__init__(self.pi, self.v)