Esempio n. 1
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = PPOModel(nub_para=2, time_leg=10)
        self.LocalNet.load_state_dict(GlobalNet.state_dict())
        self.optimizer = optim.Adam(GlobalNet.parameters(), lr=learning_rate)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)
    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63']
        self.PhyState = {_:deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara}

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [self.PhyState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara]
        [self.COMPState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1])
        self.S_Comp = torch.tensor([self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val/1000, 7)      # 가압기 압력
        if para == 'ZINST63': val = round(val/100, 7)       # 가압기 수위
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            self.CNS.init_cns(initial_nub=1)
            time.sleep(1)
            # self.CNS._send_malfunction_signal(12, 100100, 15)
            # time.sleep(1)

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            print(self.CurrentIter)

            # Initial
            done = False
            self.InitialStateSet()

            while not done:
                for t in range(self.W.TimeLeg):
                    self.CNS.run_freeze_CNS()
                    self.MakeStateSet()

                for __ in range(15):
                    spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                    # Sampling
                    for t in range(5):
                        PreVal = self.LocalNet.GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp)
                        PreVal = PreVal.tolist()[0] # (1, 2)-> (2. )

                        spy_lst.append(self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10)
                        scomp_lst.append(self.S_Comp.tolist()[0]) # (1, 2, 10) -list> (2, 10)
                        a_lst.append(PreVal)    # (2, )


                        old_before = {0: 0, 1: 0}
                        for nub_val in range(0, 2):
                            old_before[nub_val] = self.S_ONE_Py[nub_val] + PreVal[nub_val]

                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()

                        r = {0: 0, 1: 0}

                        for nub_val in range(0, 2):
                            if self.S_ONE_Py[nub_val] - 0.0001 < old_before[nub_val] < self.S_ONE_Py[nub_val] + 0.0001:
                                r[nub_val] = 0.1
                            else:
                                r[nub_val] = -0.1
                        if r[0] == 0.1 and r[1] == 0.1:
                            t_r = 0.1
                        else:
                            t_r = -0.1
                        # t_r = r[0] + r[1]

                        r_lst.append(t_r)
                        print(self.CurrentIter, PreVal, self.S_ONE_Py[0] - 0.0001, old_before[0], self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|',
                              self.S_ONE_Py[1] - 0.0001, old_before[1], self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|', r[0], r[1], t_r)

                    # Train!
                    # print('Train!!!')
                    # GAE
                    spy_fin = self.S_Py         # (1, 2, 10)
                    scomp_fin = self.S_Comp     # (1, 2, 10)

                    R = 0.0 if done else self.LocalNet.GetPredictCrticOut(spy_fin, scomp_fin).item()

                    td_target_lst = []
                    for reward in r_lst[::-1]:
                        R = gamma * R + reward
                        td_target_lst.append([R])
                    td_target_lst.reverse()

                    # Batch 만들기
                    spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                    scomp_batch = torch.tensor(scomp_lst, dtype=torch.float)
                    a_batch = torch.tensor(a_lst, dtype=torch.float)
                    td_target = torch.tensor(td_target_lst)

                    value = self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch)
                    advantage = td_target - value

                    PreVal = self.LocalNet.GetPredictActorOut(x_py=spy_batch, x_comp=scomp_batch)

                    loss = -torch.log(PreVal) * advantage.detach() + \
                           nn.functional.smooth_l1_loss(self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch),
                                                        td_target.detach())
                    # Loss Display

                    self.optimizer.zero_grad()
                    loss.mean().backward()
                    for global_param, local_param in zip(self.GlobalNet.parameters(),
                                                         self.LocalNet.parameters()):
                        global_param._grad = local_param.grad
                    self.optimizer.step()
                    self.LocalNet.load_state_dict(self.GlobalNet.state_dict())

                break
            print('Done')
Esempio n. 2
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
                 Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(
                self.GlobalNet.NET[_].state_dict())
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                                  NET=self.GlobalNet.NET)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)

    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63']
        self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {
            _: deque(maxlen=self.W.TimeLeg)
            for _ in self.COMPPara
        }

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [
            self.PhyState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.PhyPara
        ]
        [
            self.COMPState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.COMPPara
        ]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0],
                                      self.S_Py.shape[1])
        self.S_Comp = torch.tensor(
            [self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0],
                                          self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val / 1000, 7)  # 가압기 압력
        if para == 'ZINST63': val = round(val / 100, 7)  # 가압기 수위
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            self.CNS.init_cns(initial_nub=1)
            print('DONE initial')
            time.sleep(1)
            # self.CNS._send_malfunction_signal(12, 100100, 15)
            # time.sleep(1)

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            print(self.CurrentIter)

            # Initial
            done = False
            self.InitialStateSet()

            while not done:
                for t in range(self.W.TimeLeg):
                    self.CNS.run_freeze_CNS()
                    self.MakeStateSet()

                for __ in range(15):
                    spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                    a_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    r_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    # Sampling
                    for t in range(5):
                        TimeDB = {
                            'Netout': {},  # 0: .. 1:..
                        }
                        for nubNet in range(self.LocalNet.NubNET):
                            NetOut = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(x_py=self.S_Py,
                                                           x_comp=self.S_Comp)
                            NetOut = NetOut.tolist()[0][
                                0]  # (1, 1) -> (1, ) -> ()
                            TimeDB['Netout'][nubNet] = NetOut
                            a_dict[nubNet] = NetOut

                        spy_lst.append(
                            self.S_Py.tolist()[0])  # (1, 2, 10) -list> (2, 10)
                        scomp_lst.append(self.S_Comp.tolist()
                                         [0])  # (1, 2, 10) -list> (2, 10)

                        old_before = {0: 0, 1: 0}
                        for nubNet in range(self.LocalNet.NubNET):
                            old_before[nubNet] = self.S_ONE_Py[
                                nubNet] + TimeDB['Netout'][nubNet]

                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()

                        r = {0: 0, 1: 0}

                        for nub_val in range(0, 2):
                            if self.S_ONE_Py[nub_val] - 0.0001 < old_before[
                                    nub_val] < self.S_ONE_Py[nub_val] + 0.0001:
                                r[nub_val] = 1
                            else:
                                r[nub_val] = 0
                        if r[0] == 0.1 and r[1] == 0.1:
                            t_r = 0.1
                        else:
                            t_r = -0.1
                        # t_r = r[0] + r[1]
                        # r_lst.append(t_r)

                        for nubNet in range(
                                self.LocalNet.NubNET):  # 보상 네트워크별로 저장
                            r_dict[nubNet].append(r[nubNet])

                        print(self.CurrentIter, TimeDB['Netout'],
                              self.S_ONE_Py[0] - 0.0001, old_before[0],
                              self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|',
                              self.S_ONE_Py[1] - 0.0001, old_before[1],
                              self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|',
                              r[0], r[1], t_r)
                    # ==================================================================================================
                    # Train

                    gamma = 0.98
                    spy_fin = self.S_Py  # (1, 2, 10)
                    scomp_fin = self.S_Comp  # (1, 2, 10)
                    spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                    scomp_batch = torch.tensor(scomp_lst, dtype=torch.float)

                    # 각 네트워크 별 Advantage 계산
                    for nubNet in range(self.LocalNet.NubNET):
                        R = 0.0 if done else self.LocalNet.NET[
                            nubNet].GetPredictCrticOut(spy_fin,
                                                       scomp_fin).item()
                        td_target_lst = []
                        for reward in r_dict[nubNet][::-1]:
                            R = gamma * R + reward
                            td_target_lst.append([R])
                        td_target_lst.reverse()

                        td_target = torch.tensor(td_target_lst)
                        value = self.LocalNet.NET[nubNet].GetPredictCrticOut(
                            spy_batch, scomp_batch)
                        advantage = td_target - value

                        PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut(
                            spy_batch, scomp_batch)

                        loss = -torch.log(PreVal) * advantage.detach() + \
                               nn.functional.smooth_l1_loss(self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_batch, scomp_batch),
                                                            td_target.detach())

                        self.LocalOPT.NETOPT[nubNet].zero_grad()
                        loss.mean().backward()
                        for global_param, local_param in zip(
                                self.GlobalNet.NET[nubNet].parameters(),
                                self.LocalNet.NET[nubNet].parameters()):
                            global_param._grad = local_param.grad
                        self.LocalOPT.NETOPT[nubNet].step()
                        self.LocalNet.NET[nubNet].load_state_dict(
                            self.GlobalNet.NET[nubNet].state_dict())

                        # TOOL.ALLP(advantage.mean())
                        print(self.CurrentIter, 'adv: ',
                              advantage.mean().item(), 'loss: ',
                              loss.mean().item())

                print('DONE EP')
                break