def __init__(self, s_dim, a_dim, h_dim, h_act=nn.ReLU, buffer_size=100000, batch_size=32, lr=1e-4, gamma=0.95, theta=0.01, noise_level=None, n_sample=5, *args, **kwargs): super(B3DQNAgent, self).__init__() self.q_net = BayesNet(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act, noise_level=noise_level) self.target_net = BayesNet(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act, noise_level=noise_level) self.target_net.load_state_dict(self.q_net.state_dict()) self.buffer = ReplayBuffer(buffer_size) self.batch_size = batch_size self.optimizer = Adam(self.q_net.parameters(), lr=lr) self.gamma = gamma self.theta = theta self.noise_level = noise_level self.n_sample = n_sample self.a_dim = a_dim
def __init__(self, s_dim, a_dim, h_dim, h_act=nn.ReLU, buffer_size=100000, batch_size=32, lr=1e-4, gamma=0.95, theta=0.01, *args, **kwargs): super(DQNAgent, self).__init__() self.q_net = SimpleMLP(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act) self.target_net = SimpleMLP(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act) self.target_net.load_state_dict(self.q_net.state_dict()) self.buffer = ReplayBuffer(buffer_size) self.batch_size = batch_size self.optimizer = Adam(self.q_net.parameters(), lr=lr) self.gamma = gamma self.theta = theta self.a_dim = a_dim
def __init__(self, s_dim, a_dim, h_dim, h_act=nn.ReLU, buffer_size=100000, batch_size=32, lr=1e-4, gamma=0.95, theta=0.01, n_model=5, *args, **kwargs): super(BootDQNAgent, self).__init__() q_list = [ SimpleMLP(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act) for _ in range(n_model) ] target_list = [ SimpleMLP(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act) for _ in range(n_model) ] self.q_nets = nn.ModuleList(q_list) self.target_nets = nn.ModuleList(target_list) self.target_nets.load_state_dict(self.q_nets.state_dict()) self.buffer = ReplayBuffer(buffer_size) self.batch_size = batch_size self.optimizers = [ Adam(q_net.parameters(), lr=lr) for q_net in self.q_nets ] self.gamma = gamma self.theta = theta self.n_model = n_model self.current_head = None self.a_dim = a_dim
def __init__(self, s_dim, a_dim, h_dim, h_act=nn.ReLU, buffer_size=100000, batch_size=32, lr=1e-4, gamma=0.95, theta=0.01, n_model=5, *args, **kwargs): super(EnDQNAgent, self).__init__() self.q_nets = EnModel(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act, n_model=5) self.target_nets = EnModel(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act, n_model=5) self.target_nets.load_state_dict(self.q_nets.state_dict()) self.buffer = ReplayBuffer(buffer_size) self.batch_size = batch_size self.optimizers = [ Adam(head.parameters(), lr=lr) for head in self.q_nets.heads ] self.gamma = gamma self.theta = theta self.n_model = n_model self.a_dim = a_dim
def __init__(self, s_dim, a_dim, h_dim, h_act=nn.ReLU, buffer_size=100000, batch_size=32, lr=1e-4, gamma=0.95, theta=0.01, dropout=0.5, weight_decay=0.1, noise_level=None, n_sample=5, *args, **kwargs): super(DropDQNAgent, self).__init__() self.q_net = MCDropout(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act, dropout=dropout, noise_level=noise_level) self.target_net = MCDropout(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act, dropout=dropout, noise_level=noise_level, agent=False) self.target_net.load_state_dict(self.q_net.state_dict()) self.buffer = ReplayBuffer(buffer_size) self.batch_size = batch_size self.optimizer = Adam(self.q_net.parameters(), lr=lr, weight_decay=weight_decay) self.gamma = gamma self.theta = theta self.noise_level = noise_level self.n_sample = n_sample self.a_dim = a_dim