Esempio n. 1
0
 def __init__(self,action_dim,state_dim,agentParam,useLaw,useCenCritc,num_agent,CNN=False, width=None, height=None, channel=None):
     self.CNN = CNN
     self.device = agentParam["device"]
     if CNN:
         self.CNN_preprocessA = CNN_preprocess(width,height,channel)
         self.CNN_preprocessC = CNN_preprocess(width,height,channel)
         state_dim = self.CNN_preprocessA.get_state_dim()
     #if agentParam["ifload"]:
         #self.actor = torch.load(agentParam["filename"]+"actor_"+agentParam["id"]+".pth",map_location = torch.device('cuda'))
         #self.critic = torch.load(agentParam["filename"]+"critic_"+agentParam["id"]+".pth",map_location = torch.device('cuda'))
     #else:
     if useLaw:
         self.actor = ActorLaw(action_dim,state_dim).to(self.device)
     else:
         self.actor = Actor(action_dim,state_dim).to(self.device)
     if useCenCritc:
         self.critic = Centralised_Critic(state_dim,num_agent).to(self.device)
     else:
         self.critic = Critic(state_dim).to(self.device)
     self.action_dim = action_dim
     self.state_dim = state_dim
     self.noise_epsilon = 0.99
     self.constant_decay = 0.1
     self.optimizerA = torch.optim.Adam(self.actor.parameters(), lr = 0.001)
     self.optimizerC = torch.optim.Adam(self.critic.parameters(), lr = 0.001)
     self.lr_scheduler = {"optA":torch.optim.lr_scheduler.StepLR(self.optimizerA,step_size=1000,gamma=0.9,last_epoch=-1),
                          "optC":torch.optim.lr_scheduler.StepLR(self.optimizerC,step_size=1000,gamma=0.9,last_epoch=-1)}
     if CNN:
         # self.CNN_preprocessA = CNN_preprocess(width,height,channel)
         # self.CNN_preprocessC = CNN_preprocess
         self.optimizerA = torch.optim.Adam(itertools.chain(self.CNN_preprocessA.parameters(),self.actor.parameters()),lr=0.0001)
         self.optimizerC = torch.optim.Adam(itertools.chain(self.CNN_preprocessC.parameters(),self.critic.parameters()),lr=0.001)
         self.lr_scheduler = {"optA": torch.optim.lr_scheduler.StepLR(self.optimizerA, step_size=10000, gamma=0.9, last_epoch=-1),
                              "optC": torch.optim.lr_scheduler.StepLR(self.optimizerC, step_size=10000, gamma=0.9, last_epoch=-1)}
Esempio n. 2
0
 def __init__(self,
              action_dim,
              state_dim,
              CNN=False,
              width=None,
              height=None,
              channel=None,
              device='cpu'):
     self.CNN = CNN
     if CNN:
         self.CNN_preprocessA = CNN_preprocess(width, height, channel)
         self.CNN_preprocessC = CNN_preprocess(width, height, channel)
         state_dim = self.CNN_preprocessA.get_state_dim()
     self.device = device
     self.actor = Actor(action_dim, state_dim)
     self.critic = Critic(state_dim)
     self.action_dim = action_dim
     self.state_dim = state_dim
     self.noise_epsilon = 0.999
     self.constant_decay = 1
     self.optimizerA = torch.optim.Adam(self.actor.parameters(), lr=0.00001)
     self.optimizerC = torch.optim.Adam(self.critic.parameters(), lr=0.01)
     self.lr_scheduler = {
         "optA":
         torch.optim.lr_scheduler.StepLR(self.optimizerA,
                                         step_size=1000,
                                         gamma=1,
                                         last_epoch=-1),
         "optC":
         torch.optim.lr_scheduler.StepLR(self.optimizerC,
                                         step_size=1000,
                                         gamma=0.9,
                                         last_epoch=-1)
     }
     if CNN:
         # self.CNN_preprocessA = CNN_preprocess(width,height,channel)
         # self.CNN_preprocessC = CNN_preprocess
         self.optimizerA = torch.optim.Adam(itertools.chain(
             self.CNN_preprocessA.parameters(), self.actor.parameters()),
                                            lr=0.0001)
         self.optimizerC = torch.optim.Adam(itertools.chain(
             self.CNN_preprocessC.parameters(), self.critic.parameters()),
                                            lr=0.001)
         self.lr_scheduler = {
             "optA":
             torch.optim.lr_scheduler.StepLR(self.optimizerA,
                                             step_size=10000,
                                             gamma=1,
                                             last_epoch=-1),
             "optC":
             torch.optim.lr_scheduler.StepLR(self.optimizerC,
                                             step_size=10000,
                                             gamma=0.9,
                                             last_epoch=-1)
         }
Esempio n. 3
0
class IAC():
    def __init__(self,action_dim,state_dim,agentParam,useLaw,useCenCritc,num_agent,CNN=False, width=None, height=None, channel=None):
        self.CNN = CNN
        self.device = agentParam["device"]
        if CNN:
            self.CNN_preprocessA = CNN_preprocess(width,height,channel)
            self.CNN_preprocessC = CNN_preprocess(width,height,channel)
            state_dim = self.CNN_preprocessA.get_state_dim()
        #if agentParam["ifload"]:
            #self.actor = torch.load(agentParam["filename"]+"actor_"+agentParam["id"]+".pth",map_location = torch.device('cuda'))
            #self.critic = torch.load(agentParam["filename"]+"critic_"+agentParam["id"]+".pth",map_location = torch.device('cuda'))
        #else:
        if useLaw:
            self.actor = ActorLaw(action_dim,state_dim).to(self.device)
        else:
            self.actor = Actor(action_dim,state_dim).to(self.device)
        if useCenCritc:
            self.critic = Centralised_Critic(state_dim,num_agent).to(self.device)
        else:
            self.critic = Critic(state_dim).to(self.device)
        self.action_dim = action_dim
        self.state_dim = state_dim
        self.noise_epsilon = 0.99
        self.constant_decay = 0.1
        self.optimizerA = torch.optim.Adam(self.actor.parameters(), lr = 0.001)
        self.optimizerC = torch.optim.Adam(self.critic.parameters(), lr = 0.001)
        self.lr_scheduler = {"optA":torch.optim.lr_scheduler.StepLR(self.optimizerA,step_size=1000,gamma=0.9,last_epoch=-1),
                             "optC":torch.optim.lr_scheduler.StepLR(self.optimizerC,step_size=1000,gamma=0.9,last_epoch=-1)}
        if CNN:
            # self.CNN_preprocessA = CNN_preprocess(width,height,channel)
            # self.CNN_preprocessC = CNN_preprocess
            self.optimizerA = torch.optim.Adam(itertools.chain(self.CNN_preprocessA.parameters(),self.actor.parameters()),lr=0.0001)
            self.optimizerC = torch.optim.Adam(itertools.chain(self.CNN_preprocessC.parameters(),self.critic.parameters()),lr=0.001)
            self.lr_scheduler = {"optA": torch.optim.lr_scheduler.StepLR(self.optimizerA, step_size=10000, gamma=0.9, last_epoch=-1),
                                 "optC": torch.optim.lr_scheduler.StepLR(self.optimizerC, step_size=10000, gamma=0.9, last_epoch=-1)}
        # self.act_prob
        # self.act_log_prob
    #@torchsnooper.snoop()
    def choose_action(self,s):
        s = torch.Tensor(s).unsqueeze(0).to(self.device)
        if self.CNN:
            s = self.CNN_preprocessA(s.reshape((1,3,15,15)))
        self.act_prob = self.actor(s) + torch.abs(torch.randn(self.action_dim)*0.05*self.constant_decay).to(self.device)
        self.constant_decay = self.constant_decay*self.noise_epsilon
        self.act_prob = self.act_prob/torch.sum(self.act_prob).detach()
        m = torch.distributions.Categorical(self.act_prob)
        # self.act_log_prob = m.log_prob(m.sample())
        temp = m.sample()
        return temp

    def choose_act_prob(self,s):
        s = torch.Tensor(s).unsqueeze(0).to(self.device)
        self.act_prob = self.actor(s,[],False)
        return self.act_prob.detach()


    def choose_mask_action(self,s,pi):
        s = torch.Tensor(s).unsqueeze(0).to(self.device)
        if self.CNN:
            s = self.CNN_preprocessA(s.reshape((1,3,15,15)))
        self.act_prob = self.actor(s,pi,True) + torch.abs(torch.randn(self.action_dim)*0.05*self.constant_decay).to(self.device)
        self.constant_decay = self.constant_decay*self.noise_epsilon
        self.act_prob = self.act_prob/torch.sum(self.act_prob).detach()
        m = torch.distributions.Categorical(self.act_prob)
        # self.act_log_prob = m.log_prob(m.sample())
        temp = m.sample()
        return temp
    def cal_tderr(self,s,r,s_,A_or_C=None):
        s = torch.Tensor(s).unsqueeze(0).to(self.device)
        s_ = torch.Tensor(s_).unsqueeze(0).to(self.device)
        if self.CNN:
            if A_or_C == 'A':
                s = self.CNN_preprocessA(s.reshape(1,3,15,15))
                s_ = self.CNN_preprocessA(s_.reshape(1,3,15,15))
            else:
                s = self.CNN_preprocessC(s.reshape(1,3,15,15))
                s_ = self.CNN_preprocessC(s_.reshape(1,3,15,15))
        v_ = self.critic(s_).detach()
        v = self.critic(s)
        return r + 0.9*v_ - v

    def td_err_sn(self, s_n, r, s_n_):
        s = torch.Tensor(s_n).reshape((1,-1)).unsqueeze(0).to(self.device)
        s_ = torch.Tensor(s_n_).reshape((1,-1)).unsqueeze(0).to(self.device)
        v = self.critic(s)
        v_ = self.critic(s_).detach()
        return r + 0.9*v_ - v

    def LearnCenCritic(self, s_n, r, s_n_):
        td_err = self.td_err_sn(s_n,r,s_n_)
        loss = torch.mul(td_err,td_err)
        self.optimizerC.zero_grad()
        loss.backward()
        self.optimizerC.step()
        self.lr_scheduler["optC"].step()
    
    def learnCenActor(self,s_n,r,s_n_,a):
        td_err = self.td_err_sn(s_n,r,s_n_)
        m = torch.log(self.act_prob[0][a])
        temp = m*td_err.detach()
        loss = -torch.mean(temp)
        self.optimizerA.zero_grad()
        loss.backward()
        self.optimizerA.step()
        self.lr_scheduler["optA"].step()

    def learnCritic(self,s,r,s_):
        td_err = self.cal_tderr(s,r,s_)
        loss = torch.mul(td_err,td_err)
        self.optimizerC.zero_grad()
        loss.backward()
        self.optimizerC.step()
        self.lr_scheduler["optC"].step()
    #@torchsnooper.snoop()
    def learnActor(self,s,r,s_,a):
        td_err = self.cal_tderr(s,r,s_)
        m = torch.log(self.act_prob[0][a])
        temp = m*td_err.detach()
        loss = -torch.mean(temp)
        self.optimizerA.zero_grad()
        loss.backward()
        self.optimizerA.step()
        self.lr_scheduler["optA"].step()

    def update_cent(self,s,r,s_,a,s_n,s_n_):
        self.LearnCenCritic(s_n,r,s_n_)
        self.learnCenActor(s_n,r,s_n_,a)

    def update(self,s,r,s_,a):
        self.learnCritic(s,r,s_)
        self.learnActor(s,r,s_,a)
Esempio n. 4
0
class IAC():
    def __init__(self,
                 action_dim,
                 state_dim,
                 CNN=False,
                 width=None,
                 height=None,
                 channel=None,
                 device='cpu'):
        self.CNN = CNN
        if CNN:
            self.CNN_preprocessA = CNN_preprocess(width, height, channel)
            self.CNN_preprocessC = CNN_preprocess(width, height, channel)
            state_dim = self.CNN_preprocessA.get_state_dim()
        self.device = device
        self.actor = Actor(action_dim, state_dim)
        self.critic = Critic(state_dim)
        self.action_dim = action_dim
        self.state_dim = state_dim
        self.noise_epsilon = 0.999
        self.constant_decay = 1
        self.optimizerA = torch.optim.Adam(self.actor.parameters(), lr=0.00001)
        self.optimizerC = torch.optim.Adam(self.critic.parameters(), lr=0.01)
        self.lr_scheduler = {
            "optA":
            torch.optim.lr_scheduler.StepLR(self.optimizerA,
                                            step_size=1000,
                                            gamma=1,
                                            last_epoch=-1),
            "optC":
            torch.optim.lr_scheduler.StepLR(self.optimizerC,
                                            step_size=1000,
                                            gamma=0.9,
                                            last_epoch=-1)
        }
        if CNN:
            # self.CNN_preprocessA = CNN_preprocess(width,height,channel)
            # self.CNN_preprocessC = CNN_preprocess
            self.optimizerA = torch.optim.Adam(itertools.chain(
                self.CNN_preprocessA.parameters(), self.actor.parameters()),
                                               lr=0.0001)
            self.optimizerC = torch.optim.Adam(itertools.chain(
                self.CNN_preprocessC.parameters(), self.critic.parameters()),
                                               lr=0.001)
            self.lr_scheduler = {
                "optA":
                torch.optim.lr_scheduler.StepLR(self.optimizerA,
                                                step_size=10000,
                                                gamma=1,
                                                last_epoch=-1),
                "optC":
                torch.optim.lr_scheduler.StepLR(self.optimizerC,
                                                step_size=10000,
                                                gamma=0.9,
                                                last_epoch=-1)
            }
        # self.act_prob
        # self.act_log_prob

    def choose_action(self, s):
        s = torch.Tensor(s).unsqueeze(0).to(self.device)
        if self.CNN:
            s = self.CNN_preprocessA(s.reshape((1, 3, 15, 15)))
        self.act_prob = self.actor(s) + torch.abs(
            torch.randn(self.action_dim) * 0. * self.constant_decay)
        self.constant_decay = self.constant_decay * self.noise_epsilon
        self.act_prob = self.act_prob / torch.sum(self.act_prob).detach()
        m = torch.distributions.Categorical(self.act_prob)
        # self.act_log_prob = m.log_prob(m.sample())
        temp = m.sample()
        return temp

    def cal_tderr(self, s, r, s_):
        s = torch.Tensor(s).unsqueeze(0).to(self.device)
        s_ = torch.Tensor(s_).unsqueeze(0).to(self.device)
        if self.CNN:
            s = self.CNN_preprocessC(s.reshape(1, 3, 15, 15))
            s_ = self.CNN_preprocessC(s_.reshape(1, 3, 15, 15))
        v_ = self.critic(s_).detach()
        v = self.critic(s)
        return r + 0.9 * v_ - v

    def learnCritic(self, s, r, s_):
        td_err = self.cal_tderr(s, r, s_)
        loss = torch.square(td_err)
        self.optimizerC.zero_grad()
        loss.backward()
        self.optimizerC.step()
        self.lr_scheduler["optC"].step()

    def learnActor(self, s, r, s_, a):
        td_err = self.cal_tderr(s, r, s_)
        m = torch.log(self.act_prob[0][a]).to(
            self.device
        )  #in cleanup there should not be a [0], in IAC the [0] is necessary
        temp = m * td_err.detach()
        loss = -torch.mean(temp)
        self.optimizerA.zero_grad()
        loss.backward()
        self.optimizerA.step()
        self.lr_scheduler["optA"].step()

    def update(self, s, r, s_, a):
        self.learnCritic(s, r, s_)
        self.learnActor(s, r, s_, a)