def _init_agents(self): # parameter sharing self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim).to(self.device) self.rnn = RNN_Model(self.hidden_dim, self.num_agents).to(self.device) self.attention = Attention_Model(self.hidden_dim).to(self.device) self.embedding_target = Embedding_Layer( self.input_dim, self.hidden_dim).to(self.device) self.rnn_target = RNN_Model(self.hidden_dim, self.num_agents).to(self.device) self.attention_target = Attention_Model(self.hidden_dim).to( self.device) Dueling_DDQN_Learner.copy_network(self.embedding, self.embedding_target) Dueling_DDQN_Learner.copy_network(self.rnn, self.rnn_target) Dueling_DDQN_Learner.copy_network(self.attention, self.attention_target) self.share_para = chain(self.embedding.parameters(), self.attention.parameters(), self.rnn.parameters()) self.all_para = chain(self.embedding.parameters(), self.attention.parameters(), self.rnn.parameters()) # init the optimizer for i in range(self.num_agents): self.agents.append(Dueling_DDQN_Learner(self.config)) self.all_para = chain(self.all_para, self.agents[i].get_q_network().parameters()) # self.all_para = chain(self.all_para) self.share_optimizer = optim.RMSprop(self.all_para, lr=self.lr, weight_decay=1e-4)
def _init_agents(self): self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim).to(self.device) self.attention = Attention_Model(self.hidden_dim).to(self.device) self.temporal_attention = Attention_Model(self.hidden_dim).to( self.device) self.embedding_target = Embedding_Layer( self.input_dim, self.hidden_dim).to(self.device) self.attention_target = Attention_Model(self.hidden_dim).to( self.device) self.temporal_attention_target = Attention_Model(self.hidden_dim).to( self.device) Dueling_DDQN_Learner.copy_network(self.embedding, self.embedding_target) Dueling_DDQN_Learner.copy_network(self.attention, self.attention_target) Dueling_DDQN_Learner.copy_network(self.temporal_attention, self.temporal_attention_target) for i in range(self.num_agents): q_network = Double_Attention_Model(self.input_dim, self.output_dim, self.hidden_dim).to(self.device) q_network_target = Double_Attention_Model( self.input_dim, self.output_dim, self.hidden_dim).to(self.device) q_network.set_layer_para(self.embedding, self.attention, self.temporal_attention) q_network_target.set_layer_para(self.embedding_target, self.attention_target, self.temporal_attention_target) self.agents[i].set_q_network(q_network, q_network_target) # def change_mode(self): # self.q_network.change_mode() # self.q_network_target.change_mode() # for i in range(self.num_agents): # self.agents[i].q_network_current.change_mode() # self.agents[i].q_network_target.change_mode() # self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim[0]) # self.attention = Attention_Layer(self.hidden_dim[0], self.hidden_dim[1], self.hidden_dim[2]) # self.embedding_target = Embedding_Layer(self.input_dim, self.hidden_dim[0]) # self.attention_target = Attention_Layer(self.hidden_dim[0], self.hidden_dim[1], self.hidden_dim[2]) # Dueling_DDQN_Learner.copy_network(self.embedding, self.embedding_target) # Dueling_DDQN_Learner.copy_network(self.attention, self.attention_target) # def get_action(self, i, obs): # return self.agents[i].step(obs) # def store_experience(self, i, obs, action, reward, next_obs, is_done): # self.agents[i].store_experience(obs, action, reward, next_obs, is_done)
def __init__(self, input_dim, output_dim, hidden_dim): super().__init__() self.input_dim = input_dim self.output_dim = output_dim self.hidden_dim = hidden_dim self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim) self.rnn = RNN_Model(self.hidden_dim, self.hidden_dim) self.attention = Attention_Model(self.hidden_dim) self.relu = nn.ReLU() self.linear_out = nn.Linear(self.hidden_dim, self.output_dim) self.hidden, self.cell = None, None
def __init__(self, input_dim, output_dim, hidden_dim): super().__init__() self.input_dim = input_dim self.output_dim = output_dim self.hidden_dim = hidden_dim self.pre_train = False self.device = "cuda:0" if torch.cuda.is_available() else "cpu" # if embedding_layer: # self.embedding = embedding_layer # self.attention = attention_layer # self.linear1 = nn.Linear(self.input_dim, self.output_dim) # else: self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim) self.attention = Attention_Model(self.hidden_dim) self.relu = nn.ReLU() self.linear1 = nn.Linear(self.hidden_dim, self.output_dim) self.attention_score = None