예제 #1
0
 def __init__(self, state_dim, action_dim, hdim):
     super(SimpleQFn, self).__init__()
     self.Q_head = MLP(dims=[state_dim + action_dim, *hdim, 1])
예제 #2
0
 def __init__(self, state_dim, hdim):
     super(SimpleValueFn, self).__init__()
     self.value_head = MLP(dims=[state_dim, *hdim, 1])
예제 #3
0
 def __init__(self, state_dim, hdim, action_dim):
     super(SimpleBetaReluPolicy, self).__init__()
     self.encoder = MLP(dims=[state_dim, *hdim])
     self.decoder = BetaReluParams(hdim[-1], action_dim)
     self.discrete = False
예제 #4
0
 def __init__(self, state_dim, hdim, action_dim):
     super(SimpleBetaSoftPlusPolicy, self).__init__()
     self.encoder = MLP(dims=[state_dim, *hdim])
     self.decoder = BetaSoftPlusParams(hdim[-1], action_dim)
예제 #5
0
 def __init__(self, state_dim, action_dim):
     super(DiscreteCNNPolicy, self).__init__()
     self.encoder = CNN(*state_dim)
     self.decoder = MLP(
         dims=[self.encoder.image_embedding_size, action_dim])
예제 #6
0
 def __init__(self, state_dim, hdim, action_dim):
     super(BidPolicyLN, self).__init__()
     self.bid_mu = MLP(dims=[state_dim, *hdim, action_dim], zero_init=True)
     self.bid_logstd = MLP(dims=[state_dim, *hdim, action_dim],
                           zero_init=True)
     self.discrete = False
예제 #7
0
 def __init__(self, state_dim, hdim, action_dim):
     super(DiscretePolicy, self).__init__()
     self.network = MLP(dims=[state_dim, *hdim, action_dim])