Example #1
0
 def __init__(self, ndim_obs, n_actions, hidden_sizes=(50, 50, 50)):
     self.pi = policies.SoftmaxPolicy(model=links.MLP(
         ndim_obs, n_actions, hidden_sizes, nonlinearity=F.tanh))
     self.v = links.MLP(ndim_obs,
                        1,
                        hidden_sizes=hidden_sizes,
                        nonlinearity=F.tanh)
     super().__init__(self.pi, self.v)
Example #2
0
 def __init__(self, gpu=False):
     self.q_func = policies.SoftmaxPolicy(model=QFunction())
     self.v_func = VFunction()
     self.common = SharedFunctionSFCNN()  #SharedFunctionCNN()
     if gpu:
         self.q_func.to_gpu(0)
         self.v_func.to_gpu(0)
         self.common.to_gpu(0)
     #super(A3CFFSoftmax,self).__init__(self.common,self.q_func, self.v_func)
     super().__init__(self.common, self.q_func, self.v_func)
Example #3
0
 def __init__(self, ndim_obs, n_actions, hidden_sizes=(64, 64)):
     self.pi = policies.SoftmaxPolicy(
         model=links.MLP(ndim_obs, n_actions, hidden_sizes))
     self.v = links.MLP(ndim_obs, 1, hidden_sizes=hidden_sizes)
     super().__init__(self.pi, self.v)