def _init_agents(self):
     # parameter sharing
     self.embedding = Embedding_Layer(self.input_dim,
                                      self.hidden_dim).to(self.device)
     self.rnn = RNN_Model(self.hidden_dim, self.num_agents).to(self.device)
     self.attention = Attention_Model(self.hidden_dim).to(self.device)
     self.embedding_target = Embedding_Layer(
         self.input_dim, self.hidden_dim).to(self.device)
     self.rnn_target = RNN_Model(self.hidden_dim,
                                 self.num_agents).to(self.device)
     self.attention_target = Attention_Model(self.hidden_dim).to(
         self.device)
     Dueling_DDQN_Learner.copy_network(self.embedding,
                                       self.embedding_target)
     Dueling_DDQN_Learner.copy_network(self.rnn, self.rnn_target)
     Dueling_DDQN_Learner.copy_network(self.attention,
                                       self.attention_target)
     self.share_para = chain(self.embedding.parameters(),
                             self.attention.parameters(),
                             self.rnn.parameters())
     self.all_para = chain(self.embedding.parameters(),
                           self.attention.parameters(),
                           self.rnn.parameters())
     # init the optimizer
     for i in range(self.num_agents):
         self.agents.append(Dueling_DDQN_Learner(self.config))
         self.all_para = chain(self.all_para,
                               self.agents[i].get_q_network().parameters())
     # self.all_para = chain(self.all_para)
     self.share_optimizer = optim.RMSprop(self.all_para,
                                          lr=self.lr,
                                          weight_decay=1e-4)
    def _init_agents(self):
        self.embedding = Embedding_Layer(self.input_dim,
                                         self.hidden_dim).to(self.device)
        self.attention = Attention_Model(self.hidden_dim).to(self.device)
        self.temporal_attention = Attention_Model(self.hidden_dim).to(
            self.device)
        self.embedding_target = Embedding_Layer(
            self.input_dim, self.hidden_dim).to(self.device)
        self.attention_target = Attention_Model(self.hidden_dim).to(
            self.device)
        self.temporal_attention_target = Attention_Model(self.hidden_dim).to(
            self.device)
        Dueling_DDQN_Learner.copy_network(self.embedding,
                                          self.embedding_target)
        Dueling_DDQN_Learner.copy_network(self.attention,
                                          self.attention_target)
        Dueling_DDQN_Learner.copy_network(self.temporal_attention,
                                          self.temporal_attention_target)
        for i in range(self.num_agents):
            q_network = Double_Attention_Model(self.input_dim, self.output_dim,
                                               self.hidden_dim).to(self.device)
            q_network_target = Double_Attention_Model(
                self.input_dim, self.output_dim,
                self.hidden_dim).to(self.device)
            q_network.set_layer_para(self.embedding, self.attention,
                                     self.temporal_attention)
            q_network_target.set_layer_para(self.embedding_target,
                                            self.attention_target,
                                            self.temporal_attention_target)
            self.agents[i].set_q_network(q_network, q_network_target)


# def change_mode(self):
# self.q_network.change_mode()
# self.q_network_target.change_mode()
# for i in range(self.num_agents):
#     self.agents[i].q_network_current.change_mode()
#     self.agents[i].q_network_target.change_mode()
# self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim[0])
# self.attention = Attention_Layer(self.hidden_dim[0], self.hidden_dim[1], self.hidden_dim[2])
# self.embedding_target = Embedding_Layer(self.input_dim, self.hidden_dim[0])
# self.attention_target = Attention_Layer(self.hidden_dim[0], self.hidden_dim[1], self.hidden_dim[2])
# Dueling_DDQN_Learner.copy_network(self.embedding, self.embedding_target)
# Dueling_DDQN_Learner.copy_network(self.attention, self.attention_target)
# def get_action(self, i, obs):
#     return self.agents[i].step(obs)
# def store_experience(self, i, obs, action, reward, next_obs, is_done):
#     self.agents[i].store_experience(obs, action, reward, next_obs, is_done)
Exemple #3
0
 def __init__(self, input_dim, output_dim, hidden_dim):
     super().__init__()
     self.input_dim = input_dim
     self.output_dim = output_dim
     self.hidden_dim = hidden_dim
     self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim)
     self.rnn = RNN_Model(self.hidden_dim, self.hidden_dim)
     self.attention = Attention_Model(self.hidden_dim)
     self.relu = nn.ReLU()
     self.linear_out = nn.Linear(self.hidden_dim, self.output_dim)
     self.hidden, self.cell = None, None
    def __init__(self, input_dim, output_dim, hidden_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.pre_train = False
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        # if embedding_layer:
        #     self.embedding = embedding_layer
        #     self.attention = attention_layer
        #     self.linear1 = nn.Linear(self.input_dim, self.output_dim)
        # else:
        self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim)
        self.attention = Attention_Model(self.hidden_dim)
        self.relu = nn.ReLU()
        self.linear1 = nn.Linear(self.hidden_dim, self.output_dim)

        self.attention_score = None