Ejemplo n.º 1
0
 def __init__(self, alpha, epsilon, gamma, output_dim, hidden_units,
              input_dim, num_actions):
     self.alpha = alpha
     self.epsilon = epsilon
     self.gamma = gamma
     self.curr_state = np.array([None, None])
     self.curr_action = 0
     self.q = nn.SingleLayerNN(output_dim, hidden_units, input_dim,
                               nn.LinearActivation(),
                               nn.SigmoidActivation())
     self.num_actions = num_actions
Ejemplo n.º 2
0
 def __init__(self,
              policy_units,
              value_units,
              state_vector_len,
              num_actions,
              actor_stepsize,
              critic_stepsize,
              lambda_pi,
              lambda_v,
              gamma,
              cov=1,
              clip_grads=False,
              normalize=False):
     NeuralACAgent.__init__(self, policy_units, value_units,
                            nn.LinearActivation(), nn.SigmoidActivation(),
                            nn.LinearActivation(), nn.SigmoidActivation(),
                            state_vector_len, 1, num_actions,
                            actor_stepsize, critic_stepsize, lambda_pi,
                            lambda_v, gamma, cov, cov, clip_grads,
                            normalize)
Ejemplo n.º 3
0
    def __init__(self,
                 num_target_policy_hidden_units,
                 state_vector_len,
                 num_actions,
                 actor_stepsize,
                 critic_stepsize,
                 lambda_pi,
                 lambda_v,
                 gamma,
                 cov=1.0,
                 clip_grads=False,
                 clip_rho=True):

        action_vector_len = 1

        NNPolicyLinearOPACAgent.__init__(self, num_target_policy_hidden_units,
                                         nn.LinearActivation(),
                                         nn.SigmoidActivation(),
                                         state_vector_len, action_vector_len,
                                         num_actions, actor_stepsize,
                                         critic_stepsize, lambda_pi, lambda_v,
                                         gamma, cov, clip_grads, clip_rho)