def __init__(self, alpha, epsilon, gamma, output_dim, hidden_units, input_dim, num_actions): self.alpha = alpha self.epsilon = epsilon self.gamma = gamma self.curr_state = np.array([None, None]) self.curr_action = 0 self.q = nn.SingleLayerNN(output_dim, hidden_units, input_dim, nn.LinearActivation(), nn.SigmoidActivation()) self.num_actions = num_actions
def __init__(self, policy_units, value_units, state_vector_len, num_actions, actor_stepsize, critic_stepsize, lambda_pi, lambda_v, gamma, cov=1, clip_grads=False, normalize=False): NeuralACAgent.__init__(self, policy_units, value_units, nn.LinearActivation(), nn.SigmoidActivation(), nn.LinearActivation(), nn.SigmoidActivation(), state_vector_len, 1, num_actions, actor_stepsize, critic_stepsize, lambda_pi, lambda_v, gamma, cov, cov, clip_grads, normalize)
def __init__(self, num_target_policy_hidden_units, state_vector_len, num_actions, actor_stepsize, critic_stepsize, lambda_pi, lambda_v, gamma, cov=1.0, clip_grads=False, clip_rho=True): action_vector_len = 1 NNPolicyLinearOPACAgent.__init__(self, num_target_policy_hidden_units, nn.LinearActivation(), nn.SigmoidActivation(), state_vector_len, action_vector_len, num_actions, actor_stepsize, critic_stepsize, lambda_pi, lambda_v, gamma, cov, clip_grads, clip_rho)