Exemple #1
0
    def __init__(self,
                 s_dim,
                 a_dim,
                 h_dim,
                 h_act=nn.ReLU,
                 buffer_size=100000,
                 batch_size=32,
                 lr=1e-4,
                 gamma=0.95,
                 theta=0.01,
                 noise_level=None,
                 n_sample=5,
                 *args,
                 **kwargs):
        super(B3DQNAgent, self).__init__()
        self.q_net = BayesNet(in_dim=s_dim,
                              o_dim=a_dim,
                              h_dim=h_dim,
                              h_act=h_act,
                              noise_level=noise_level)
        self.target_net = BayesNet(in_dim=s_dim,
                                   o_dim=a_dim,
                                   h_dim=h_dim,
                                   h_act=h_act,
                                   noise_level=noise_level)
        self.target_net.load_state_dict(self.q_net.state_dict())
        self.buffer = ReplayBuffer(buffer_size)
        self.batch_size = batch_size
        self.optimizer = Adam(self.q_net.parameters(), lr=lr)
        self.gamma = gamma
        self.theta = theta

        self.noise_level = noise_level
        self.n_sample = n_sample
        self.a_dim = a_dim
Exemple #2
0
 def __init__(self,
              s_dim,
              a_dim,
              h_dim,
              h_act=nn.ReLU,
              buffer_size=100000,
              batch_size=32,
              lr=1e-4,
              gamma=0.95,
              theta=0.01,
              *args,
              **kwargs):
     super(DQNAgent, self).__init__()
     self.q_net = SimpleMLP(in_dim=s_dim,
                            o_dim=a_dim,
                            h_dim=h_dim,
                            h_act=h_act)
     self.target_net = SimpleMLP(in_dim=s_dim,
                                 o_dim=a_dim,
                                 h_dim=h_dim,
                                 h_act=h_act)
     self.target_net.load_state_dict(self.q_net.state_dict())
     self.buffer = ReplayBuffer(buffer_size)
     self.batch_size = batch_size
     self.optimizer = Adam(self.q_net.parameters(), lr=lr)
     self.gamma = gamma
     self.theta = theta
     self.a_dim = a_dim
Exemple #3
0
 def __init__(self,
              s_dim,
              a_dim,
              h_dim,
              h_act=nn.ReLU,
              buffer_size=100000,
              batch_size=32,
              lr=1e-4,
              gamma=0.95,
              theta=0.01,
              n_model=5,
              *args,
              **kwargs):
     super(BootDQNAgent, self).__init__()
     q_list = [
         SimpleMLP(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act)
         for _ in range(n_model)
     ]
     target_list = [
         SimpleMLP(in_dim=s_dim, o_dim=a_dim, h_dim=h_dim, h_act=h_act)
         for _ in range(n_model)
     ]
     self.q_nets = nn.ModuleList(q_list)
     self.target_nets = nn.ModuleList(target_list)
     self.target_nets.load_state_dict(self.q_nets.state_dict())
     self.buffer = ReplayBuffer(buffer_size)
     self.batch_size = batch_size
     self.optimizers = [
         Adam(q_net.parameters(), lr=lr) for q_net in self.q_nets
     ]
     self.gamma = gamma
     self.theta = theta
     self.n_model = n_model
     self.current_head = None
     self.a_dim = a_dim
Exemple #4
0
    def __init__(self,
                 s_dim,
                 a_dim,
                 h_dim,
                 h_act=nn.ReLU,
                 buffer_size=100000,
                 batch_size=32,
                 lr=1e-4,
                 gamma=0.95,
                 theta=0.01,
                 n_model=5,
                 *args,
                 **kwargs):
        super(EnDQNAgent, self).__init__()
        self.q_nets = EnModel(in_dim=s_dim,
                              o_dim=a_dim,
                              h_dim=h_dim,
                              h_act=h_act,
                              n_model=5)
        self.target_nets = EnModel(in_dim=s_dim,
                                   o_dim=a_dim,
                                   h_dim=h_dim,
                                   h_act=h_act,
                                   n_model=5)
        self.target_nets.load_state_dict(self.q_nets.state_dict())
        self.buffer = ReplayBuffer(buffer_size)
        self.batch_size = batch_size

        self.optimizers = [
            Adam(head.parameters(), lr=lr) for head in self.q_nets.heads
        ]
        self.gamma = gamma
        self.theta = theta
        self.n_model = n_model
        self.a_dim = a_dim
Exemple #5
0
    def __init__(self,
                 s_dim,
                 a_dim,
                 h_dim,
                 h_act=nn.ReLU,
                 buffer_size=100000,
                 batch_size=32,
                 lr=1e-4,
                 gamma=0.95,
                 theta=0.01,
                 dropout=0.5,
                 weight_decay=0.1,
                 noise_level=None,
                 n_sample=5,
                 *args,
                 **kwargs):
        super(DropDQNAgent, self).__init__()
        self.q_net = MCDropout(in_dim=s_dim,
                               o_dim=a_dim,
                               h_dim=h_dim,
                               h_act=h_act,
                               dropout=dropout,
                               noise_level=noise_level)
        self.target_net = MCDropout(in_dim=s_dim,
                                    o_dim=a_dim,
                                    h_dim=h_dim,
                                    h_act=h_act,
                                    dropout=dropout,
                                    noise_level=noise_level,
                                    agent=False)
        self.target_net.load_state_dict(self.q_net.state_dict())
        self.buffer = ReplayBuffer(buffer_size)
        self.batch_size = batch_size
        self.optimizer = Adam(self.q_net.parameters(),
                              lr=lr,
                              weight_decay=weight_decay)
        self.gamma = gamma
        self.theta = theta

        self.noise_level = noise_level
        self.n_sample = n_sample
        self.a_dim = a_dim