def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
              Remote_port):
     mp.Process.__init__(self)
     # Network info
     self.GlobalNet = GlobalNet
     self.LocalNet = NETBOX()
     for _ in range(0, self.LocalNet.NubNET):
         self.LocalNet.NET[_].load_state_dict(
             self.GlobalNet.NET[_].state_dict())
     self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                               NET=self.GlobalNet.NET)
     # CNS
     self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
     # SharedMem
     self.mem = MEM
     self.LocalMem = copy.deepcopy(self.mem)
     # Work info
     self.W = Work_info()
     # GP Setting
     self.fig_dict = {
         i_: plt.figure(figsize=(13, 13))
         for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
     }
     self.ax_dict = {
         i_: self.fig_dict[i_].add_subplot()
         for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
     }
     print(f'Make -- {self}')
Exemple #2
0
 def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port):
     mp.Process.__init__(self)
     # Work info
     self.W = Work_info()
     # CNS
     self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port, Max_len=self.W.TimeLeg)
     self.CNS.LoggerPath = 'DB'
     # SharedMem
     self.mem = MEM
     self.LocalMem = copy.deepcopy(self.mem)
     print(f'Make -- {self}')
Exemple #3
0
 def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port):
     mp.Process.__init__(self)
     # Network info
     self.GlobalNet = GlobalNet
     self.LocalNet = PPOModel(nub_para=2, time_leg=10)
     self.LocalNet.load_state_dict(GlobalNet.state_dict())
     self.optimizer = optim.Adam(GlobalNet.parameters(), lr=learning_rate)
     # CNS
     self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
     # SharedMem
     self.mem = MEM
     self.LocalMem = copy.deepcopy(self.mem)
     # Work info
     self.W = Work_info()
Exemple #4
0
 def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
              Remote_port):
     mp.Process.__init__(self)
     # Network info
     self.GlobalNet = GlobalNet
     self.LocalNet = NETBOX()
     for _ in range(0, self.LocalNet.NubNET):
         self.LocalNet.NET[_].load_state_dict(
             self.GlobalNet.NET[_].state_dict())
     self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                               NET=self.GlobalNet.NET)
     # CNS
     self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
     # SharedMem
     self.mem = MEM
     self.LocalMem = copy.deepcopy(self.mem)
     # Work info
     self.W = Work_info()
     print(f'Make -- {self}')
Exemple #5
0
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(self.GlobalNet.NET[_].state_dict())
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET)
        # Work info
        self.W = Work_info()
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port, Max_len=self.W.TimeLeg)
        self.CNS.LoggerPath = 'V6_2'
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # 사용되는 파라메터
        self.PARA_info = {
            'ZINST58': {'Div': 1000, 'Round': 5, 'Type': 'P'},
            'ZINST63': {'Div': 100, 'Round': 4, 'Type': 'P'},
            'ZVCT': {'Div': 100, 'Round': 4, 'Type': 'P'},
            'BFV122': {'Div': 1, 'Round': 2, 'Type': 'F'},
            'BPV145': {'Div': 1, 'Round': 2, 'Type': 'F'},
            'BPV122C': {'Div': 2, 'Round': 2, 'Type': 'C'},
            'BPV145C': {'Div': 2, 'Round': 2, 'Type': 'C'},
        }
        ## 사용되는 파라메터가 db_add.txt에 있는지 확인하는 모듈
        if self.mem['Iter'] == 0:
            # 사용되는 파라메터가 db_add.txt에 있는지 체크
            for _ in self.PARA_info.keys():
                if not f'v{_}' in self.CNS.mem.keys():
                    print(f'v{_} 값이 없음 db_add.txt에 추가할 것')
            # 역으로 db_add에 있으나 사용되지 않은 파라메터 출력
            for _ in self.CNS.mem.keys():
                if _[0] == 'v':  # 첫글자가 v이면..
                    if not _[1:] in self.PARA_info.keys():
                        print(f'{_} 값이 없음 self.PARA_info에 추가할 것')
        ## -----------------------------------------------

        # GP Setting
        # self.fig_dict = {i_: plt.figure(figsize=(13, 13)) for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]}
        # self.ax_dict = {i_: self.fig_dict[i_].add_subplot() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]}
        print(f'Make -- {self}')
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
                 Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(
                self.GlobalNet.NET[_].state_dict())
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                                  NET=self.GlobalNet.NET)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()
        # GP Setting
        self.fig_dict = {
            i_: plt.figure(figsize=(13, 13))
            for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
        }
        self.ax_dict = {
            i_: self.fig_dict[i_].add_subplot()
            for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
        }
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)

    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63', 'ZVCT']
        self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {
            _: deque(maxlen=self.W.TimeLeg)
            for _ in self.COMPPara
        }

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [
            self.PhyState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.PhyPara
        ]
        [
            self.COMPState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.COMPPara
        ]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0],
                                      self.S_Py.shape[1])
        self.S_Comp = torch.tensor(
            [self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0],
                                          self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val / 1000, 5)  # 가압기 압력
        if para == 'ZINST63': val = round(val / 100, 4)  # 가압기 수위
        if para == 'ZVCT': val = round(val / 100, 4)  # VCT 수위
        if para == 'BFV122': val = round(val, 2)  # BF122 Pos
        if para == 'BPV145': val = round(val, 2)  # BPV145 Pos
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5
            self.CNS.reset(initial_nub=1,
                           mal=True,
                           mal_case=36,
                           mal_opt=size,
                           mal_time=maltime)
            print(f'DONE initial {size}, {maltime}')

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            # 진단 모듈 Tester !
            if self.CurrentIter != 0 and self.CurrentIter % 30 == 0:
                print(self.CurrentIter, 'Yes Test')
                self.PrognosticMode = True
            else:
                print(self.CurrentIter, 'No Test')
                self.PrognosticMode = False

            # Initial
            done = False
            self.InitialStateSet()

            # GP 이전 데이터 Clear
            [
                self.ax_dict[i_].clear()
                for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
            ]

            while not done:
                fulltime = 15
                t_max = 5  # total iteration = fulltime * t_max
                tun = [1000, 100, 100, 1, 1]
                ro = [2, 2, 2, 2, 2]
                ProgRecodBox = {
                    "ZINST58": [],
                    "ZINST63": [],
                    "ZVCT": [],
                    "BFV122": [],
                    "BPV145": []
                }  # recode 초기화

                if self.PrognosticMode:
                    # Test Mode
                    SOFTMODE = False
                    for t in range(self.W.TimeLeg):
                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()
                        [
                            ProgRecodBox[i_].append(
                                round(self.CNS.mem[i_]['Val'], r_) / t_)
                            for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro)
                        ]

                    if not SOFTMODE:
                        for __ in range(fulltime * t_max):  # total iteration
                            if __ != 0 and __ % 10 == 0:  # 10Step 마다 예지
                                # copy self.S_Py, self.S_Comp
                                copySPy, copySComp = self.S_Py, self.S_Comp
                                copyRecodBox = {
                                    "ZINST58": [],
                                    "ZINST63": [],
                                    "ZVCT": [],
                                    "BFV122": [],
                                    "BPV145": []
                                }  # recode 초기화
                                # TOOL.ALLP(copyRecodBox["ZINST58"], "CopySPy")
                                for PredictTime in range(
                                        __, fulltime *
                                        t_max):  # 시간이 갈수록 예지하는 시간이 줄어듬.
                                    # 예지 시작
                                    save_ragular_para = {
                                        _: 0
                                        for _ in range(self.LocalNet.NubNET)
                                    }
                                    for nubNet in range(
                                            0, self.LocalNet.NubNET):
                                        NetOut = self.LocalNet.NET[
                                            nubNet].GetPredictActorOut(
                                                x_py=copySPy, x_comp=copySComp)
                                        NetOut = NetOut.view(
                                            -1)  # (1, 2) -> (2, )
                                        TOOL.ALLP(NetOut, 'Net_out')

                                        if nubNet < 6:
                                            act_ = NetOut.argmax().item(
                                            )  # 행열에서 최대값을 추출 후 값 반환
                                            save_ragular_para[nubNet] = (
                                                act_ - 100
                                            ) / 100  # act_ 값이 값의 증감으로 변경
                                        else:  # 6, 7
                                            save_ragular_para[
                                                nubNet] = NetOut.data.numpy()
                                            TOOL.ALLP(
                                                save_ragular_para[nubNet],
                                                f'save_reagular_para{nubNet}')
                                    TOOL.ALLP(save_ragular_para,
                                              "save_ragular_para")

                                    # copySPy, copySComp에 값 추가
                                    # copySpy
                                    copySPyLastVal = copySPy[:, :,
                                                             -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.

                                    # add_val = tensor([[
                                    #     [round(save_ragular_para[0] / 1000, 5)],
                                    #     [round(save_ragular_para[1] / 100, 4)],
                                    #     [round(save_ragular_para[2] / 100, 4)]
                                    # ]])
                                    add_val = tensor(
                                        [[[save_ragular_para[6][0]],
                                          [save_ragular_para[6][1]],
                                          [save_ragular_para[6][2]]]],
                                        dtype=torch.float)
                                    TOOL.ALLP(copySPyLastVal, "copySPyLastVal")
                                    TOOL.ALLP(add_val, "add_val")
                                    # copySPyLastVal = copySPyLastVal + add_val  # 마지막 변수에 예측된 값을 더해줌.
                                    copySPyLastVal = add_val  # 마지막 변수에 예측된 값을 더해줌.

                                    copySPy = torch.cat(
                                        (copySPy, copySPyLastVal),
                                        dim=2)  # 본래 텐서에 값을 더함.
                                    # 반올림
                                    TOOL.ALLP(copySPy.data.numpy(), "COPYSPY")
                                    copySPy = np.around(copySPy.data.numpy(),
                                                        decimals=5)
                                    TOOL.ALLP(copySPy, "COPYSPY_Round")
                                    copySPy = torch.tensor(copySPy)
                                    copySPy = copySPy[:, :, 1:]  # 맨뒤의 값을 자름.
                                    # TOOL.ALLP(copySPy.data.numpy(), "copySPy Next")

                                    # copySComp
                                    copySCompLastVal = copySComp[:, :,
                                                                 -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.
                                    # TOOL.ALLP(copySCompLastVal.data.numpy(), "COPYSCOMP")
                                    # copySpy와 다르게 copy SComp는 이전의 제어 값을 그대로 사용함.
                                    # TODO
                                    # 자기자신 자체
                                    copySCompLastVal = tensor(
                                        [[[round(save_ragular_para[3], 2)],
                                          [round(save_ragular_para[4], 2)]]])

                                    copySComp = torch.cat(
                                        (copySComp, copySCompLastVal),
                                        dim=2)  # 본래 텐서에 값을 더함.
                                    # 반올림
                                    copySComp = np.around(
                                        copySComp.data.numpy(), decimals=3)
                                    copySComp = torch.tensor(copySComp)
                                    copySComp = copySComp[:, :,
                                                          1:]  # 맨뒤의 값을 자름.
                                    # 결과값 Recode
                                    copyRecodBox["ZINST58"].append(
                                        copySPyLastVal[0, 0, 0].item())
                                    copyRecodBox["ZINST63"].append(
                                        copySPyLastVal[0, 1, 0].item())
                                    copyRecodBox["ZVCT"].append(
                                        copySPyLastVal[0, 2, 0].item())

                                    copyRecodBox["BFV122"].append(
                                        copySComp[0, 0, 0].item())
                                    copyRecodBox["BPV145"].append(
                                        copySComp[0, 1, 0].item())
                                # 예지 종료 결과값 Recode 그래픽화
                                [
                                    self.ax_dict[i_].plot(
                                        ProgRecodBox[i_] + copyRecodBox[i_],
                                        label=f"{i_}_{__}") for i_ in [
                                            "ZINST58", "ZINST63", "ZVCT",
                                            "BFV122", "BPV145"
                                        ]
                                ]

                            # plt.show()
                            # CNS + 1 Step
                            self.CNS.run_freeze_CNS()
                            self.MakeStateSet()
                            [
                                ProgRecodBox[i_].append(
                                    round(self.CNS.mem[i_]['Val'], r_) / t_)
                                for i_, t_, r_ in zip(ProgRecodBox.keys(), tun,
                                                      ro)
                            ]

                    else:

                        for __ in range(fulltime * t_max):  # total iteration
                            if __ != 0 and __ % 10 == 0:  # 10Step 마다 예지
                                # copy self.S_Py, self.S_Comp
                                copySPy, copySComp = self.S_Py, self.S_Comp
                                copyRecodBox = {
                                    "ZINST58": [],
                                    "ZINST63": [],
                                    "ZVCT": [],
                                    "BFV122": [],
                                    "BPV145": []
                                }  # recode 초기화
                                # TOOL.ALLP(copyRecodBox["ZINST58"], "CopySPy")
                                for PredictTime in range(
                                        __, fulltime *
                                        t_max):  # 시간이 갈수록 예지하는 시간이 줄어듬.
                                    # 예지 시작
                                    save_ragular_para = {
                                        _: 0
                                        for _ in range(self.LocalNet.NubNET)
                                    }
                                    for nubNet in range(
                                            0, self.LocalNet.NubNET):
                                        NetOut = self.LocalNet.NET[
                                            nubNet].GetPredictActorOut(
                                                x_py=copySPy, x_comp=copySComp)
                                        NetOut = NetOut.view(
                                            -1)  # (1, 2) -> (2, )
                                        act_ = NetOut.argmax().item(
                                        )  # 행열에서 최대값을 추출 후 값 반환
                                        save_ragular_para[nubNet] = (
                                            act_ -
                                            100) / 100  # act_ 값이 값의 증감으로 변경
                                    TOOL.ALLP(save_ragular_para,
                                              "save_ragular_para")

                                    # copySPy, copySComp에 값 추가
                                    # copySpy
                                    copySPyLastVal = copySPy[:, :,
                                                             -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.

                                    add_val = tensor([[[
                                        round(save_ragular_para[0] / 1000, 5)
                                    ], [
                                        round(save_ragular_para[1] / 100, 4)
                                    ], [round(save_ragular_para[2] / 100,
                                              4)]]])
                                    TOOL.ALLP(copySPyLastVal, "copySPyLastVal")
                                    TOOL.ALLP(add_val, "add_val")
                                    copySPyLastVal = copySPyLastVal + add_val  # 마지막 변수에 예측된 값을 더해줌.

                                    copySPy = torch.cat(
                                        (copySPy, copySPyLastVal),
                                        dim=2)  # 본래 텐서에 값을 더함.
                                    # 반올림
                                    TOOL.ALLP(copySPy.data.numpy(), "COPYSPY")
                                    copySPy = np.around(copySPy.data.numpy(),
                                                        decimals=5)
                                    TOOL.ALLP(copySPy, "COPYSPY_Round")
                                    copySPy = torch.tensor(copySPy)
                                    copySPy = copySPy[:, :, 1:]  # 맨뒤의 값을 자름.
                                    # TOOL.ALLP(copySPy.data.numpy(), "copySPy Next")

                                    # copySComp
                                    copySCompLastVal = copySComp[:, :,
                                                                 -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.
                                    # TOOL.ALLP(copySCompLastVal.data.numpy(), "COPYSCOMP")
                                    # copySpy와 다르게 copy SComp는 이전의 제어 값을 그대로 사용함.
                                    #TODO
                                    # 자기자신 자체
                                    copySCompLastVal = tensor(
                                        [[[round(save_ragular_para[3], 2)],
                                          [round(save_ragular_para[4], 2)]]])

                                    copySComp = torch.cat(
                                        (copySComp, copySCompLastVal),
                                        dim=2)  # 본래 텐서에 값을 더함.
                                    # 반올림
                                    copySComp = np.around(
                                        copySComp.data.numpy(), decimals=3)
                                    copySComp = torch.tensor(copySComp)
                                    copySComp = copySComp[:, :,
                                                          1:]  # 맨뒤의 값을 자름.
                                    # 결과값 Recode
                                    copyRecodBox["ZINST58"].append(
                                        copySPyLastVal[0, 0, 0].item())
                                    copyRecodBox["ZINST63"].append(
                                        copySPyLastVal[0, 1, 0].item())
                                    copyRecodBox["ZVCT"].append(
                                        copySPyLastVal[0, 2, 0].item())

                                    copyRecodBox["BFV122"].append(
                                        copySComp[0, 0, 0].item())
                                    copyRecodBox["BPV145"].append(
                                        copySComp[0, 1, 0].item())
                                # 예지 종료 결과값 Recode 그래픽화
                                [
                                    self.ax_dict[i_].plot(
                                        ProgRecodBox[i_] + copyRecodBox[i_],
                                        label=f"{i_}_{__}") for i_ in [
                                            "ZINST58", "ZINST63", "ZVCT",
                                            "BFV122", "BPV145"
                                        ]
                                ]

                            # plt.show()
                            # CNS + 1 Step
                            self.CNS.run_freeze_CNS()
                            self.MakeStateSet()
                            [
                                ProgRecodBox[i_].append(
                                    round(self.CNS.mem[i_]['Val'], r_) / t_)
                                for i_, t_, r_ in zip(ProgRecodBox.keys(), tun,
                                                      ro)
                            ]

                    # END Test Mode CODE
                    [
                        self.ax_dict[i_].grid() for i_ in
                        ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
                    ]
                    [
                        self.ax_dict[i_].legend() for i_ in
                        ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
                    ]
                    [
                        self.fig_dict[i_].savefig(
                            f"{self.CurrentIter}_{i_}.png") for i_ in
                        ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
                    ]
                    print('END TEST')

                else:
                    # Train Mode
                    for t in range(self.W.TimeLeg):
                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()

                    for __ in range(fulltime):
                        spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                        a_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        a_now = {_: 0 for _ in range(self.LocalNet.NubNET)}
                        a_prob = {_: [] for _ in range(self.LocalNet.NubNET)}
                        r_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        done_dict = {
                            _: []
                            for _ in range(self.LocalNet.NubNET)
                        }

                        y_predict = {
                            _: []
                            for _ in range(self.LocalNet.NubNET)
                        }
                        y_answer = {_: [] for _ in range(self.LocalNet.NubNET)}
                        # Sampling
                        for t in range(t_max):
                            NetOut_dict = {
                                _: 0
                                for _ in range(self.LocalNet.NubNET)
                            }
                            for nubNet in range(0, self.LocalNet.NubNET):
                                # TOOL.ALLP(self.S_Py, 'S_Py')
                                # TOOL.ALLP(self.S_Comp, 'S_Comp')
                                NetOut = self.LocalNet.NET[
                                    nubNet].GetPredictActorOut(
                                        x_py=self.S_Py, x_comp=self.S_Comp)
                                NetOut = NetOut.view(-1)  # (1, 2) -> (2, )
                                # TOOL.ALLP(NetOut, 'Netout before Categorical')
                                if nubNet < 6:
                                    act = torch.distributions.Categorical(
                                        NetOut).sample().item(
                                        )  # 2개 중 샘플링해서 값 int 반환
                                    # TOOL.ALLP(act, 'act')
                                    NetOut = NetOut.tolist()[act]
                                    # TOOL.ALLP(NetOut, f'NetOut{nubNet}')
                                    NetOut_dict[nubNet] = NetOut
                                    # TOOL.ALLP(NetOut_dict, f'NetOut{nubNet}')

                                    a_now[nubNet] = act
                                    a_dict[nubNet].append([act])
                                    a_prob[nubNet].append([NetOut])
                                else:
                                    y_predict[nubNet].append(
                                        NetOut.data.numpy())
                                    # TOOL.ALLP(y_predict[nubNet], 'y_predict')

                            spy_lst.append(self.S_Py.tolist()
                                           [0])  # (1, 2, 10) -list> (2, 10)
                            scomp_lst.append(self.S_Comp.tolist()
                                             [0])  # (1, 2, 10) -list> (2, 10)

                            # old val to compare the new val
                            ComparedPara = [
                                "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                "BPV145"
                            ]
                            ComparedParaRound = [2, 2, 2, 2, 2]
                            self.old_cns = {
                                para: round(self.CNS.mem[para]['Val'], pr)
                                for para, pr in zip(ComparedPara,
                                                    ComparedParaRound)
                            }
                            # TOOL.ALLP(self.old_cns, "old_CNS")

                            # CNS + 1 Step
                            self.CNS.run_freeze_CNS()
                            self.MakeStateSet()
                            self.new_cns = {
                                para: round(self.CNS.mem[para]['Val'], pr)
                                for para, pr in zip(ComparedPara,
                                                    ComparedParaRound)
                            }

                            y_answer_one = self.S_Py[:, :, -1:].data.reshape(3)
                            # TOOL.ALLP(y_answer_one, "Answer_one")
                            y_answer[6].append(y_answer_one.numpy())
                            y_answer_one = self.S_Comp[:, :,
                                                       -1:].data.reshape(2)
                            y_answer[7].append(y_answer_one.numpy())
                            # TOOL.ALLP(y_answer, "y_answer")

                            # 보상 및 종료조건 계산
                            r = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
                            pa = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
                            for nubNet in range(0, 6):  # 보상 네트워크별로 계산 및 저장
                                # for nubNet in range(0, self.LocalNet.NubNET):      # 보상 네트워크별로 계산 및 저장
                                if nubNet == 0:
                                    if self.CNS.mem['KCNTOMS']['Val'] < maltime:
                                        if a_now[nubNet] == 1:  # Malfunction
                                            r[nubNet] = -1
                                        else:
                                            r[nubNet] = 1
                                    else:
                                        if a_now[nubNet] == 1:  # Malfunction
                                            r[nubNet] = 1
                                        else:
                                            r[nubNet] = -1
                                else:
                                    predict_a = round(
                                        (a_now[nubNet] - 100) / 100, 2)
                                    pa[nubNet] = predict_a
                                    # TODO
                                    #  변수 타입에 따라서 로직 변화함.
                                    want_para_reward = {
                                        1: "ZVCT",
                                        2: "ZINST58",
                                        3: "ZINST63",
                                        4: "BFV122",
                                        5: "BPV145"
                                    }
                                    if nubNet < 4:
                                        if self.new_cns[want_para_reward[
                                                nubNet]] == self.old_cns[
                                                    want_para_reward[
                                                        nubNet]] + predict_a:
                                            r[nubNet] = 1
                                        else:
                                            DeZVCT = self.new_cns[
                                                want_para_reward[nubNet]] - (
                                                    self.
                                                    old_cns[want_para_reward[
                                                        nubNet]] + predict_a)
                                            if DeZVCT < 0:  # 예측된 값이 더 크다.
                                                # 12.2 - 12.1 -> 0.1
                                                # r[nubNet] = 1 - ((self.old_cns[want_para_reward[nubNet]] + predict_a) - self.new_cns[want_para_reward[nubNet]])
                                                r[nubNet] = -((
                                                    self.
                                                    old_cns[want_para_reward[
                                                        nubNet]] + predict_a
                                                ) - self.new_cns[
                                                    want_para_reward[nubNet]])
                                            else:  # 예측된 값이 더 작다.
                                                # 12.2 - 12.1 -> 0.3
                                                # r[nubNet] = 1 - ( - (self.old_cns[want_para_reward[nubNet]] + predict_a) + self.new_cns[want_para_reward[nubNet]])
                                                r[nubNet] = -(-(
                                                    self.
                                                    old_cns[want_para_reward[
                                                        nubNet]] + predict_a
                                                ) + self.new_cns[
                                                    want_para_reward[nubNet]])
                                        r[nubNet] = round(
                                            r[nubNet],
                                            3)  # 0.100 나와서 2자리에서 반올림.
                                    else:
                                        if self.new_cns[want_para_reward[
                                                nubNet]] == predict_a:
                                            r[nubNet] = 1
                                        else:
                                            DeZVCT = self.new_cns[
                                                want_para_reward[
                                                    nubNet]] - predict_a
                                            if DeZVCT < 0:  # 예측된 값이 더 크다.
                                                r[nubNet] = -(
                                                    predict_a - self.new_cns[
                                                        want_para_reward[
                                                            nubNet]])
                                            else:
                                                r[nubNet] = -(
                                                    -predict_a + self.new_cns[
                                                        want_para_reward[
                                                            nubNet]])
                                        r[nubNet] = round(
                                            r[nubNet],
                                            3)  # 0.100 나와서 3자리에서 반올림.

                                r_dict[nubNet].append(r[nubNet])
                                # TOOL.ALLP(r[nubNet], "r_nubNet")
                                # TOOL.ALLP(pa[nubNet], "pa_nubNet")
                                # 종료 조건 계산
                                if __ == 14 and t == t_max - 1:
                                    done_dict[nubNet].append(0)
                                    done = True
                                else:
                                    done_dict[nubNet].append(1)

                            def dp_want_val(val, name):
                                return f"{name}: {self.CNS.mem[val]['Val']:4.4f}"

                            print(
                                self.CurrentIter,
                                f"{r[0]:6}|{r[1]:6}|{r[2]:6}|{r[3]:6}|{r[4]:6}|{r[5]:6}|",
                                f'{NetOut_dict[0]:0.4f}',
                                f'{NetOut_dict[1]:0.4f}',
                                f'{NetOut_dict[2]:0.4f}',
                                f'{NetOut_dict[3]:0.4f}',
                                f'{NetOut_dict[4]:0.4f}',
                                f'{NetOut_dict[5]:0.4f}',
                                f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}",
                                # dp_want_val('PVCT', 'VCT pressure'),
                                f"VCT Level: {self.new_cns['ZVCT']}",
                                f"{self.old_cns['ZVCT'] + pa[1]:5.2f} + {pa[1]:5.2f}",
                                f"PZR pre: {self.new_cns['ZINST58']}",
                                f"{self.old_cns['ZINST58'] + pa[2]:5.2f} + {pa[2]:5.2f}",
                                f"PZR Level: {self.new_cns['ZINST63']}",
                                f"{self.old_cns['ZINST63'] + pa[3]:5.2f} + {pa[3]:5.2f}",
                                f"BFV122: {self.new_cns['BFV122']}",
                                f"{self.new_cns['BFV122'] + pa[4]:5.2f} + {pa[4]:5.2f}",
                                f"BFV122: {self.new_cns['BPV145']}",
                                f"{self.new_cns['BPV145'] + pa[5]:5.2f} + {pa[5]:5.2f}",
                                # dp_want_val('UPRT', 'PRT temp'), dp_want_val('ZINST48', 'PRT pressure'),
                                # dp_want_val('ZINST36', 'Let-down flow'), dp_want_val('BFV122', 'Charging Valve pos'),
                                # dp_want_val('BPV145', 'Let-down Valve pos'),
                            )

                        # ==================================================================================================
                        # Train

                        gamma = 0.98
                        lmbda = 0.95

                        # 1 .. 10
                        spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                        scomp_batch = torch.tensor(scomp_lst,
                                                   dtype=torch.float)
                        # 2 .. 10 + (1 Last value)
                        spy_lst.append(self.S_Py.tolist()[0])
                        scomp_lst.append(self.S_Comp.tolist()[0])
                        spy_fin = torch.tensor(spy_lst[1:], dtype=torch.float)
                        scomp_fin = torch.tensor(scomp_lst[1:],
                                                 dtype=torch.float)

                        # 각 네트워크 별 Advantage 계산
                        for nubNet in range(0, 6):
                            # for nubNet in range(0, self.LocalNet.NubNET):
                            # GAE
                            # r_dict[nubNet]: (5,) -> (5,1)
                            # Netout : (5,1)
                            # done_dict[nubNet]: (5,) -> (5,1)
                            td_target = torch.tensor(r_dict[nubNet], dtype=torch.float).view(t_max, 1) + \
                                        gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \
                                        torch.tensor(done_dict[nubNet], dtype=torch.float).view(t_max, 1)
                            delta = td_target - self.LocalNet.NET[
                                nubNet].GetPredictCrticOut(
                                    spy_batch, scomp_batch)
                            delta = delta.detach().numpy()

                            adv_list = []
                            adv_ = 0.0
                            for reward in delta[::-1]:
                                adv_ = gamma * adv_ * lmbda + reward[0]
                                adv_list.append([adv_])
                            adv_list.reverse()
                            adv = torch.tensor(adv_list, dtype=torch.float)

                            PreVal = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(
                                    spy_batch, scomp_batch)
                            PreVal = PreVal.gather(
                                1, torch.tensor(a_dict[nubNet]))  # PreVal_a
                            # TOOL.ALLP(PreVal, f"Preval {nubNet}")

                            # Ratio 계산 a/b == exp(log(a) - log(b))
                            # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}")
                            Preval_old_a_prob = torch.tensor(a_prob[nubNet],
                                                             dtype=torch.float)
                            ratio = torch.exp(
                                torch.log(PreVal) -
                                torch.log(Preval_old_a_prob))
                            # TOOL.ALLP(ratio, f"ratio {nubNet}")

                            # surr1, 2
                            eps_clip = 0.1
                            surr1 = ratio * adv
                            surr2 = torch.clamp(ratio, 1 - eps_clip,
                                                1 + eps_clip) * adv

                            min_val = torch.min(surr1, surr2)
                            smooth_l1_loss = nn.functional.smooth_l1_loss(
                                self.LocalNet.NET[nubNet].GetPredictCrticOut(
                                    spy_batch, scomp_batch),
                                td_target.detach())

                            loss = -min_val + smooth_l1_loss

                            self.LocalOPT.NETOPT[nubNet].zero_grad()
                            loss.mean().backward()
                            for global_param, local_param in zip(
                                    self.GlobalNet.NET[nubNet].parameters(),
                                    self.LocalNet.NET[nubNet].parameters()):
                                global_param._grad = local_param.grad
                            self.LocalOPT.NETOPT[nubNet].step()
                            self.LocalNet.NET[nubNet].load_state_dict(
                                self.GlobalNet.NET[nubNet].state_dict())

                            # TOOL.ALLP(advantage.mean())
                            # print(self.CurrentIter, 'AgentNub: ', nubNet,
                            #       'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(),
                            #       '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')')

                        for nubNet in range(6, 8):
                            y_predict_tensor = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(
                                    spy_batch, scomp_batch)
                            # TOOL.ALLP(y_predict[nubNet], "loss_y_predict")
                            # TOOL.ALLP(y_answer[nubNet], "loss_y_predict")
                            y_answer_tensor = torch.tensor(y_answer[nubNet],
                                                           dtype=torch.float)
                            # TOOL.ALLP(y_predict_tensor, "loss_y_predict")
                            # TOOL.ALLP(y_answer_tensor, "loss_y_predict_ans")

                            loss = nn.functional.mse_loss(
                                y_predict_tensor, y_answer_tensor)

                            self.LocalOPT.NETOPT[nubNet].zero_grad()
                            # loss.mean().backward()
                            loss.backward()
                            for global_param, local_param in zip(
                                    self.GlobalNet.NET[nubNet].parameters(),
                                    self.LocalNet.NET[nubNet].parameters()):
                                global_param._grad = local_param.grad
                            self.LocalOPT.NETOPT[nubNet].step()
                            self.LocalNet.NET[nubNet].load_state_dict(
                                self.GlobalNet.NET[nubNet].state_dict())

                print('DONE EP')
                break
Exemple #7
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = PPOModel(nub_para=2, time_leg=10)
        self.LocalNet.load_state_dict(GlobalNet.state_dict())
        self.optimizer = optim.Adam(GlobalNet.parameters(), lr=learning_rate)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)
    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63']
        self.PhyState = {_:deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara}

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [self.PhyState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara]
        [self.COMPState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1])
        self.S_Comp = torch.tensor([self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val/1000, 7)      # 가압기 압력
        if para == 'ZINST63': val = round(val/100, 7)       # 가압기 수위
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            self.CNS.init_cns(initial_nub=1)
            time.sleep(1)
            # self.CNS._send_malfunction_signal(12, 100100, 15)
            # time.sleep(1)

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            print(self.CurrentIter)

            # Initial
            done = False
            self.InitialStateSet()

            while not done:
                for t in range(self.W.TimeLeg):
                    self.CNS.run_freeze_CNS()
                    self.MakeStateSet()

                for __ in range(15):
                    spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                    # Sampling
                    for t in range(5):
                        PreVal = self.LocalNet.GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp)
                        PreVal = PreVal.tolist()[0] # (1, 2)-> (2. )

                        spy_lst.append(self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10)
                        scomp_lst.append(self.S_Comp.tolist()[0]) # (1, 2, 10) -list> (2, 10)
                        a_lst.append(PreVal)    # (2, )


                        old_before = {0: 0, 1: 0}
                        for nub_val in range(0, 2):
                            old_before[nub_val] = self.S_ONE_Py[nub_val] + PreVal[nub_val]

                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()

                        r = {0: 0, 1: 0}

                        for nub_val in range(0, 2):
                            if self.S_ONE_Py[nub_val] - 0.0001 < old_before[nub_val] < self.S_ONE_Py[nub_val] + 0.0001:
                                r[nub_val] = 0.1
                            else:
                                r[nub_val] = -0.1
                        if r[0] == 0.1 and r[1] == 0.1:
                            t_r = 0.1
                        else:
                            t_r = -0.1
                        # t_r = r[0] + r[1]

                        r_lst.append(t_r)
                        print(self.CurrentIter, PreVal, self.S_ONE_Py[0] - 0.0001, old_before[0], self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|',
                              self.S_ONE_Py[1] - 0.0001, old_before[1], self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|', r[0], r[1], t_r)

                    # Train!
                    # print('Train!!!')
                    # GAE
                    spy_fin = self.S_Py         # (1, 2, 10)
                    scomp_fin = self.S_Comp     # (1, 2, 10)

                    R = 0.0 if done else self.LocalNet.GetPredictCrticOut(spy_fin, scomp_fin).item()

                    td_target_lst = []
                    for reward in r_lst[::-1]:
                        R = gamma * R + reward
                        td_target_lst.append([R])
                    td_target_lst.reverse()

                    # Batch 만들기
                    spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                    scomp_batch = torch.tensor(scomp_lst, dtype=torch.float)
                    a_batch = torch.tensor(a_lst, dtype=torch.float)
                    td_target = torch.tensor(td_target_lst)

                    value = self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch)
                    advantage = td_target - value

                    PreVal = self.LocalNet.GetPredictActorOut(x_py=spy_batch, x_comp=scomp_batch)

                    loss = -torch.log(PreVal) * advantage.detach() + \
                           nn.functional.smooth_l1_loss(self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch),
                                                        td_target.detach())
                    # Loss Display

                    self.optimizer.zero_grad()
                    loss.mean().backward()
                    for global_param, local_param in zip(self.GlobalNet.parameters(),
                                                         self.LocalNet.parameters()):
                        global_param._grad = local_param.grad
                    self.optimizer.step()
                    self.LocalNet.load_state_dict(self.GlobalNet.state_dict())

                break
            print('Done')
Exemple #8
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
                 Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(
                self.GlobalNet.NET[_].state_dict())
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                                  NET=self.GlobalNet.NET)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)

    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63']
        self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {
            _: deque(maxlen=self.W.TimeLeg)
            for _ in self.COMPPara
        }

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [
            self.PhyState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.PhyPara
        ]
        [
            self.COMPState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.COMPPara
        ]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0],
                                      self.S_Py.shape[1])
        self.S_Comp = torch.tensor(
            [self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0],
                                          self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val / 1000, 7)  # 가압기 압력
        if para == 'ZINST63': val = round(val / 100, 7)  # 가압기 수위
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5
            self.CNS.reset(initial_nub=1,
                           mal=True,
                           mal_case=36,
                           mal_opt=size,
                           mal_time=maltime)
            print(f'DONE initial {size}, {maltime}')

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            print(self.CurrentIter)

            # Initial
            done = False
            self.InitialStateSet()

            while not done:
                for t in range(self.W.TimeLeg):
                    self.CNS.run_freeze_CNS()
                    self.MakeStateSet()

                for __ in range(15):
                    spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                    a_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    r_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    # Sampling
                    for t in range(5):
                        TimeDB = {
                            'Netout': {},  # 0: .. 1:..
                        }
                        for nubNet in range(self.LocalNet.NubNET):
                            NetOut = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(x_py=self.S_Py,
                                                           x_comp=self.S_Comp)
                            NetOut = NetOut.view(-1)  # (1, 2) -> (2, )
                            act = torch.distributions.Categorical(
                                NetOut).sample().item()  # 2개 중 샘플링해서 값 int 반환
                            # TOOL.ALLP(act, 'act')
                            NetOut = NetOut.tolist()[act]
                            # TOOL.ALLP(NetOut, 'NetOut')

                            TimeDB['Netout'][nubNet] = NetOut
                            a_dict[nubNet].append([act])

                        spy_lst.append(
                            self.S_Py.tolist()[0])  # (1, 2, 10) -list> (2, 10)
                        scomp_lst.append(self.S_Comp.tolist()
                                         [0])  # (1, 2, 10) -list> (2, 10)

                        # CNS + 1 Step
                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()
                        # 보상 계산
                        r = {0: 0, 1: 0}
                        for nubNet in range(
                                self.LocalNet.NubNET):  # 보상 네트워크별로 계산 및 저장

                            if self.CNS.mem['KCNTOMS']['Val'] < maltime:
                                if act == 1:  # Malfunction
                                    r[nubNet] = -1
                                else:
                                    r[nubNet] = 1
                            else:
                                if act == 1:  # Malfunction
                                    r[nubNet] = 1
                                else:
                                    r[nubNet] = -1

                            r_dict[nubNet].append(r[nubNet])

                        print(self.CurrentIter, r[0], NetOut)

                    # ==================================================================================================
                    # Train

                    gamma = 0.98
                    spy_fin = self.S_Py  # (1, 2, 10)    Last value
                    scomp_fin = self.S_Comp  # (1, 2, 10)   Last value
                    spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                    scomp_batch = torch.tensor(scomp_lst, dtype=torch.float)

                    # 각 네트워크 별 Advantage 계산
                    for nubNet in range(self.LocalNet.NubNET):
                        R = 0.0 if done else self.LocalNet.NET[
                            nubNet].GetPredictCrticOut(spy_fin,
                                                       scomp_fin).item()
                        td_target_lst = []
                        for reward in r_dict[nubNet][::-1]:
                            R = gamma * R + reward
                            td_target_lst.append([R])
                        td_target_lst.reverse()

                        td_target = torch.tensor(td_target_lst)
                        value = self.LocalNet.NET[nubNet].GetPredictCrticOut(
                            spy_batch, scomp_batch)
                        advantage = td_target - value

                        PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut(
                            spy_batch, scomp_batch)

                        Preval_a = PreVal.gather(1,
                                                 torch.tensor(a_dict[nubNet]))

                        loss = -torch.log(Preval_a) * advantage.detach() + \
                               nn.functional.smooth_l1_loss(self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_batch, scomp_batch),
                                                            td_target.detach())

                        self.LocalOPT.NETOPT[nubNet].zero_grad()
                        loss.mean().backward()
                        for global_param, local_param in zip(
                                self.GlobalNet.NET[nubNet].parameters(),
                                self.LocalNet.NET[nubNet].parameters()):
                            global_param._grad = local_param.grad
                        self.LocalOPT.NETOPT[nubNet].step()
                        self.LocalNet.NET[nubNet].load_state_dict(
                            self.GlobalNet.NET[nubNet].state_dict())

                        # TOOL.ALLP(advantage.mean())
                        print(self.CurrentIter, 'AgentNub: ', nubNet, 'adv: ',
                              advantage.mean().item(), 'loss: ',
                              loss.mean().item())

                print('DONE EP')
                break
Exemple #9
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port):
        mp.Process.__init__(self)
        # Work info
        self.W = Work_info()
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port, Max_len=self.W.TimeLeg)
        self.CNS.LoggerPath = 'DB'
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act=0):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []
        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)
    #
    # ==============================================================================================================
    # 입력 출력 값 생성

    def PreProcessing(self):
        pass

    def CNSStep(self):
        self.CNS.run_freeze_CNS()   # CNS에 취득한 값을 메모리에 업데이트
        self.PreProcessing()        # 취득된 값에 기반하여 db_add.txt의 변수명에 해당하는 값을 재처리 및 업데이트
        self.CNS._append_val_to_list()  # 최종 값['Val']를 ['List']에 저장

    def run(self):
        while True:
            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            # Mal function initial
            # size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5
            # mal_case = 36

            try:
                # 1: {'Case': 0, 'Opt': 0, 'Time': 0}
                size = self.W.mal_list[self.CurrentIter]['Opt']
                maltime = self.W.mal_list[self.CurrentIter]['Time']
                mal_case = self.W.mal_list[self.CurrentIter]['Case']

                mal_case2 = self.W.mal_list[self.CurrentIter]['Case2']
                mal_opt2 = self.W.mal_list[self.CurrentIter]['Opt2']
                mal_time2 = self.W.mal_list[self.CurrentIter]['Time2']

                file_name = f'{mal_case}_{size}_{maltime}_{mal_case2}_{mal_opt2}_{mal_time2}'
                # CNS initial
                self.CNS.reset(initial_nub=1, mal=True, mal_case=mal_case, mal_opt=size, mal_time=maltime,
                               # mal_case2=mal_case2, mal_opt2=mal_opt2, mal_time2=mal_time2,
                               file_name=file_name)
                time.sleep(1)
                # self.CNS._send_malfunction_signal(Mal_nub=mal_case2, Mal_opt=mal_opt2, Mal_time=mal_time2)
                # time.sleep(2)

                print(f'DONE initial {file_name}')

                while True:
                    # 초기 제어 Setting 보내기
                    # self.send_action()
                    # time.sleep(1)

                    # Train Mode
                    # Time Leg 만큼 데이터 수집만 수행
                    for t in range(self.W.TimeLeg + 1):
                        self.CNSStep()
                        # Mal_nub, Mal_opt, Mal_time):
                        # if t == 0:
                        #     self.CNS._send_malfunction_signal(Mal_nub=mal_case2, Mal_opt=mal_opt2, Mal_time=mal_time2)
                        #     time.sleep(100)

                    print('DONE EP')
                    break
            except:
                break
        print('END')
Exemple #10
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
                 Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        # 부모 네트워크의 정보를 자식 네트워크로 업데이트
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(
                self.GlobalNet.NET[_].state_dict())
        # 옵티마이저 생성
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                                  NET=self.GlobalNet.NET)

        # Work info
        self.W = Work_info()
        # RLMem info
        self.RLMem = RLMem(net_nub=self.LocalNet.NubNET)

        # CNS
        self.CNS = CNS(self.name,
                       CNS_ip,
                       CNS_port,
                       Remote_ip,
                       Remote_port,
                       Max_len=self.W.TimeLeg)
        self.CNS.LoggerPath = 'V6_1_EOP'
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)

        # 사용되는 파라메터
        self.PARA_info = {
            # 변수명 : {'Div': 몇으로 나눌 것인지, 'Round': 반올림, 'Type': 어디에 저장할 것인지.}
            'ZINST58': {
                'Div': 1000,
                'Round': 5,
                'Type': 'P'
            },
            'ZINST63': {
                'Div': 100,
                'Round': 4,
                'Type': 'P'
            },
            'ZVCT': {
                'Div': 100,
                'Round': 4,
                'Type': 'P'
            },
            'BFV122': {
                'Div': 1,
                'Round': 2,
                'Type': 'F'
            },
            'BPV145': {
                'Div': 1,
                'Round': 2,
                'Type': 'F'
            },
            'BPV122C': {
                'Div': 2,
                'RoCNSnd': 2,
                'Type': 'C'
            },
            'BPV145C': {
                'Div': 2,
                'Round': 2,
                'Type': 'C'
            },
        }
        ## 사용되는 파라메터가 db_add.txt에 있는지 확인하는 모듈
        if self.mem['Iter'] == 0:
            # 사용되는 파라메터가 db_add.txt에 있는지 체크
            for _ in self.PARA_info.keys():
                if not f'v{_}' in self.CNS.mem.keys():
                    print(f'v{_} 값이 없음 db_add.txt에 추가할 것')
            # 역으로 db_add에 있으나 사용되지 않은 파라메터 출력
            for _ in self.CNS.mem.keys():
                if _[0] == 'v':  # 첫글자가 v이면..
                    if not _[1:] in self.PARA_info.keys():
                        print(f'{_} 값이 없음 self.PARA_info에 추가할 것')
        ## -----------------------------------------------

        # GP Setting
        # self.fig_dict = {i_: plt.figure(figsize=(13, 13)) for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]}
        # self.ax_dict = {i_: self.fig_dict[i_].add_subplot() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]}
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act=0, BFV122=0, PV145=0):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        if act == 0:
            self.send_action_append(["KSWO100", "KSWO89"],
                                    [1, 1])  # BFV122 Man,  PV145 Man

        if PV145 == 0:
            self.send_action_append(["KSWO90", "KSWO91"], [0, 0])  # PV145 Stay
        elif PV145 == 1:
            self.send_action_append(["KSWO90", "KSWO91"], [0, 1])  # PV145 Up
        elif PV145 == 2:
            self.send_action_append(["KSWO90", "KSWO91"], [1, 0])  # PV145 Down

        if BFV122 == 0:
            self.send_action_append(["KSWO101", "KSWO102"],
                                    [0, 0])  # BFV122 Stay
        elif BFV122 == 1:
            self.send_action_append(["KSWO101", "KSWO102"],
                                    [0, 1])  # BFV122 Up
        elif BFV122 == 2:
            self.send_action_append(["KSWO101", "KSWO102"],
                                    [1, 0])  # BFV122 Down

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)

    #
    # ==============================================================================================================
    # 입력 출력 값 생성

    def PreProcessing(self):
        # Network용 입력 값 재처리
        for k in self.PARA_info.keys():
            if self.PARA_info[k]['Type'] != 'C':  # Control 변수를 제외한 변수만 재처리
                self.CNS.mem[f'v{k}']['Val'] = TOOL.RoundVal(
                    self.CNS.mem[k]['Val'], self.PARA_info[k]['Div'],
                    self.PARA_info[k]['Round'])

        # Network에 사용되는 값 업데이트
        if True:
            # Tensor로 전환
            # self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
            S_py_list, S_Comp_list = [], []
            for k in self.PARA_info.keys():
                if self.PARA_info[f'{k}']['Type'] == 'P':
                    S_py_list.append(self.CNS.mem[f'{k}']['List'])
                if self.PARA_info[f'{k}']['Type'] == 'F':
                    S_Comp_list.append(self.CNS.mem[f'{k}']['List'])

            self.S_Py = torch.tensor(S_py_list)
            self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0],
                                          self.S_Py.shape[1])
            self.S_Comp = torch.tensor(S_Comp_list)
            self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0],
                                              self.S_Comp.shape[1])

    def CNSStep(self):
        self.CNS.run_freeze_CNS()  # CNS에 취득한 값을 메모리에 업데이트
        self.PreProcessing()  # 취득된 값에 기반하여 db_add.txt의 변수명에 해당하는 값을 재처리 및 업데이트
        self.CNS._append_val_to_list()  # 최종 값['Val']를 ['List']에 저장

    def run(self):
        while True:
            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            # Mal function initial
            size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5
            # CNS initial
            self.CNS.reset(initial_nub=1,
                           mal=True,
                           mal_case=36,
                           mal_opt=size,
                           mal_time=maltime,
                           file_name=self.CurrentIter)
            print(f'DONE initial {size}, {maltime}')

            # 진단 모듈 Tester !
            if self.CurrentIter != 0 and self.CurrentIter % 100 == 0:
                print(self.CurrentIter, 'Yes Test')
                self.PrognosticMode = True
            else:
                print(self.CurrentIter, 'No Test')
                self.PrognosticMode = False

            # Initial
            done = False

            # GP 이전 데이터 Clear
            # [self.ax_dict[i_].clear() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]]

            while not done:
                fulltime = 2
                t_max = 2  # total iteration = fulltime * t_max
                ep_iter = 0
                tun = [1000, 100, 100, 1, 1]
                ro = [5, 4, 4, 2, 2]

                # ProgRecodBox = {"Time": [], "ZINST58": [], "ZINST63": [], "ZVCT": [], "BFV122": [], "BPV145": [], "BFV122_CONT": [], "BPV145_CONT": []}   # recode 초기화
                # Timer = 0

                if self.PrognosticMode:  # TODO 작업 필요함... 0817
                    for i in range(0, 2):
                        if i == 0:  # Automode
                            # 초기 제어 Setting 보내기
                            self.send_action()
                            time.sleep(1)

                        # Test Mode
                        for save_time_leg in range(self.W.TimeLeg):
                            self.CNS.run_freeze_CNS()
                            self.MakeStateSet()
                            Timer, ProgRecodBox = self.Recode(
                                ProgRecodBox,
                                Timer,
                                S_Py=self.S_Py,
                                S_Comp=self.S_Comp)

                        for t in range(fulltime * t_max):  # total iteration
                            if t == 0 or t % 10 == 0:  # 0스텝 또는 10 스텝마다 예지
                                copySPy, copySComp = copy.deepcopy(
                                    self.S_Py), copy.deepcopy(
                                        self.S_Comp)  # 내용만 Copy
                                copyRecodBox = copy.deepcopy(ProgRecodBox)
                                Temp_Timer = copy.deepcopy(Timer)
                                for PredictTime in range(
                                        t, fulltime *
                                        t_max):  # 시간이 갈수록 예지하는 시간이 줄어듬.
                                    save_ragular_para = {
                                        _: 0
                                        for _ in range(self.LocalNet.NubNET)
                                    }
                                    # 예지된 값 생산
                                    for nubNet in range(
                                            0, self.LocalNet.NubNET):
                                        NetOut = self.LocalNet.NET[
                                            nubNet].GetPredictActorOut(
                                                x_py=copySPy, x_comp=copySComp)
                                        NetOut = NetOut.view(
                                            -1)  # (1, 2) -> (2, )
                                        act_ = NetOut.argmax().item(
                                        )  # 행열에서 최대값을 추출 후 값 반환
                                        if nubNet in [0, 6, 7]:
                                            save_ragular_para[nubNet] = act_
                                        elif nubNet in [1]:
                                            save_ragular_para[nubNet] = round(
                                                (act_ - 100) / 100000, 5)
                                        elif nubNet in [2, 3]:
                                            save_ragular_para[nubNet] = round(
                                                (act_ - 100) / 10000, 4)
                                        elif nubNet in [4, 5]:
                                            save_ragular_para[nubNet] = round(
                                                (act_ - 100) / 100, 2)
                                    # 예지된 값 저장 및 종료

                                    #
                                    copySPyLastVal = copySPy[:, :,
                                                             -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.
                                    add_val = tensor([[[save_ragular_para[1]],
                                                       [save_ragular_para[2]],
                                                       [save_ragular_para[3]]]
                                                      ])
                                    copySPyLastVal = copySPyLastVal + add_val  # 마지막 변수에 예측된 값을 더해줌.
                                    copySPy = torch.cat(
                                        (copySPy, copySPyLastVal),
                                        dim=2)  # 본래 텐서에 값을 더함.
                                    # copySPy = torch.tensor(copySPy)
                                    copySPy = copySPy[:, :, 1:]  # 맨뒤의 값을 자름.

                                    copySCompLastVal = copySComp[:, :,
                                                                 -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.
                                    copySCompLastVal = tensor([[
                                        [save_ragular_para[4]],
                                        [save_ragular_para[5]],
                                        [save_ragular_para[6] / 2],
                                        [save_ragular_para[7] / 2],
                                    ]])
                                    copySComp = torch.cat(
                                        (copySComp, copySCompLastVal),
                                        dim=2)  # 본래 텐서에 값을 더함.
                                    # copySComp = torch.tensor(copySComp)
                                    copySComp = copySComp[:, :,
                                                          1:]  # 맨뒤의 값을 자름.

                                    # Recode
                                    Temp_Timer, copyRecodBox = self.Recode(
                                        copyRecodBox,
                                        Temp_Timer,
                                        S_Py=copySPy,
                                        S_Comp=copySComp)

                                # 예지 종료 결과값 Recode 그래픽화
                                [
                                    self.ax_dict[i_].plot(copyRecodBox["Time"],
                                                          copyRecodBox[i_],
                                                          label=f"{i_}_{t}")
                                    for i_ in [
                                        "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                        "BPV145", "BFV122_CONT", "BPV145_CONT"
                                    ]
                                ]
                                [
                                    self.fig_dict[i_].savefig(
                                        f"{i_}_{self.CurrentIter}_{t}.png")
                                    for i_ in [
                                        "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                        "BPV145", "BFV122_CONT", "BPV145_CONT"
                                    ]
                                ]

                            a_now = {_: 0 for _ in range(self.LocalNet.NubNET)}
                            for nubNet in range(0, self.LocalNet.NubNET):
                                # TOOL.ALLP(self.S_Py, 'S_Py')
                                # TOOL.ALLP(self.S_Comp, 'S_Comp')
                                NetOut = self.LocalNet.NET[
                                    nubNet].GetPredictActorOut(
                                        x_py=self.S_Py, x_comp=self.S_Comp)
                                NetOut = NetOut.view(-1)  # (1, 2) -> (2, )
                                # TOOL.ALLP(NetOut, 'Netout before Categorical')
                                act = torch.distributions.Categorical(
                                    NetOut).sample().item(
                                    )  # 2개 중 샘플링해서 값 int 반환

                                if nubNet in [0, 6, 7]:
                                    a_now[nubNet] = act
                                elif nubNet in [1]:
                                    a_now[nubNet] = round((act - 100) / 100000,
                                                          5)
                                elif nubNet in [2, 3]:
                                    a_now[nubNet] = round((act - 100) / 10000,
                                                          4)
                                elif nubNet in [4, 5]:
                                    a_now[nubNet] = round((act - 100) / 100, 2)
                            # Send Act to CNS!
                            self.send_action(act=0,
                                             BFV122=a_now[6],
                                             PV145=a_now[7])

                            # CNS + 1 Step
                            self.CNS.run_freeze_CNS()
                            self.MakeStateSet(BFV122=a_now[6], PV145=a_now[7])
                            # Recode
                            Timer, ProgRecodBox = self.Recode(
                                ProgRecodBox,
                                Timer,
                                S_Py=self.S_Py,
                                S_Comp=self.S_Comp)

                        # END Test Mode CODE
                        [
                            self.ax_dict[i_].grid() for i_ in [
                                "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                "BPV145", "BFV122_CONT", "BPV145_CONT"
                            ]
                        ]
                        [
                            self.ax_dict[i_].legend() for i_ in [
                                "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                "BPV145", "BFV122_CONT", "BPV145_CONT"
                            ]
                        ]
                        if i == 0:
                            [
                                self.fig_dict[i_].savefig(
                                    f"{i_}_{self.CurrentIter}_M.png")
                                for i_ in [
                                    "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                    "BPV145", "BFV122_CONT", "BPV145_CONT"
                                ]
                            ]
                        else:
                            [
                                self.fig_dict[i_].savefig(
                                    f"{i_}_{self.CurrentIter}_A.png")
                                for i_ in [
                                    "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                    "BPV145", "BFV122_CONT", "BPV145_CONT"
                                ]
                            ]
                        print('END TEST')
                else:
                    # Train Mode

                    # 초기 제어 Setting 보내기
                    self.send_action()
                    time.sleep(1)

                    # Time Leg 만큼 데이터 수집만 수행
                    for t in range(self.W.TimeLeg + 1):
                        self.CNSStep()

                    # 실제 훈련 시작 부분
                    for __ in range(fulltime):
                        # spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                        # a_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        # a_now = {_: 0 for _ in range(self.LocalNet.NubNET)}
                        # a_now_orgin = {_: 0 for _ in range(self.LocalNet.NubNET)}
                        # a_prob = {_: [] for _ in range(self.LocalNet.NubNET)}
                        #
                        # r_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        # done_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        #
                        # y_predict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        # y_answer = {_: [] for _ in range(self.LocalNet.NubNET)}
                        self.RLMem.CleanTrainMem()
                        # Sampling
                        for t in range(t_max):
                            NetOut_dict = {
                                _: 0
                                for _ in range(self.LocalNet.NubNET)
                            }
                            for nubNet in range(0, self.LocalNet.NubNET):
                                # TOOL.ALLP(self.S_Py, 'S_Py')
                                # TOOL.ALLP(self.S_Comp, 'S_Comp')

                                # 입력 변수들에서 Actor 네트워크의 출력을 받음.
                                NetOut = self.LocalNet.NET[
                                    nubNet].GetPredictActorOut(
                                        x_py=self.S_Py, x_comp=self.S_Comp)
                                NetOut = NetOut.view(-1)  # (1, 2) -> (2, )
                                # TOOL.ALLP(NetOut, 'Netout before Categorical')

                                # act 계산 이때 act는 int 값.
                                act = torch.distributions.Categorical(
                                    NetOut).sample().item(
                                    )  # 2개 중 샘플링해서 값 int 반환
                                # TOOL.ALLP(act, 'act')

                                # act의 확률 값을 반환
                                NetOut = NetOut.tolist()[act]
                                # TOOL.ALLP(NetOut, f'NetOut{nubNet}')

                                # act와 확률 값 저장
                                self.RLMem.SaveNetOut(nubNet, NetOut, act)

                                # NetOut_dict[nubNet] = NetOut
                                # TOOL.ALLP(NetOut_dict, f'NetOut{nubNet}')

                                modify_act = 0
                                if nubNet in [0, 6, 7]:
                                    modify_act = act
                                elif nubNet in [1]:
                                    modify_act = round((act - 100) / 100000, 5)
                                elif nubNet in [2, 3]:
                                    modify_act = round((act - 100) / 10000, 4)
                                elif nubNet in [4, 5]:
                                    modify_act = round((act - 100) / 100, 2)

                                # 수정된 act 저장 <- 주로 실제 CNS의 제어 변수에 이용하기 위해서 사용
                                self.RLMem.SaveModNetOut(nubNet, modify_act)

                                # a_now_orgin[nubNet] = act
                                # a_dict[nubNet].append([act])        # for training
                                # a_prob[nubNet].append([NetOut])     # for training

                            # 훈련용 상태 저장
                            self.RLMem.SaveState(self.S_Py, self.S_Comp)
                            # spy_lst.append(self.S_Py.tolist()[0])  # (1, 3, 15) -list> (3, 15)
                            # scomp_lst.append(self.S_Comp.tolist()[0])  # (1, 3, 15) -list> (3, 15)

                            # old val to compare the new val
                            self.old_phys = self.S_Py[:, :, -1:].data.reshape(
                                3).tolist()  # (3,)
                            self.old_comp = self.S_Comp[:, :,
                                                        -1:].data.reshape(
                                                            2).tolist()  # (3,)
                            self.old_cns = [  # "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"
                                round(self.old_phys[0], 5),
                                round(self.old_phys[1], 4),
                                round(self.old_phys[2], 4),
                                round(self.old_comp[0], 2),
                                round(self.old_comp[1], 2)
                            ]
                            # TOOL.ALLP(self.old_cns, "old_CNS")

                            # Send Act to CNS!
                            self.send_action(act=0,
                                             BFV122=self.RLMem.GetAct(6),
                                             PV145=self.RLMem.GetAct(7))

                            # CNS + 1 Step
                            self.CNS.run_freeze_CNS()
                            # self.MakeStateSet(BFV122=a_now[6], PV145=a_now[7])

                            self.new_phys = self.S_Py[:, :, -1:].data.reshape(
                                3).tolist()  # (3,)
                            self.new_comp = self.S_Comp[:, :,
                                                        -1:].data.reshape(
                                                            2).tolist()  # (3,)
                            self.new_cns = [  # "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"
                                round(self.new_phys[0], 5),
                                round(self.new_phys[1], 4),
                                round(self.new_phys[2], 4),
                                round(self.new_comp[0], 2),
                                round(self.new_comp[1], 2)
                            ]

                            # Recode
                            # Timer, ProgRecodBox = self.Recode(ProgRecodBox, Timer, S_Py=self.S_Py, S_Comp=self.S_Comp)

                            # 보상 및 종료조건 계산
                            # r = {_: 0 for _ in range(0, self.LocalNet.NubNET)}
                            for nubNet in range(
                                    0,
                                    self.LocalNet.NubNET):  # 보상 네트워크별로 계산 및 저장
                                if nubNet in [0]:
                                    if self.CNS.mem['KCNTOMS']['Val'] < maltime:
                                        if self.RLMem.int_mod_action[
                                                nubNet] == 1:  # Malfunction
                                            self.RLMem.SaveReward(nubNet, -1)
                                        else:
                                            self.RLMem.SaveReward(nubNet, 1)
                                    else:
                                        if self.RLMem.int_mod_action[
                                                nubNet] == 1:  # Malfunction
                                            self.RLMem.SaveReward(nubNet, 1)
                                        else:
                                            self.RLMem.SaveReward(nubNet, -1)
                                elif nubNet in [1, 2, 3]:
                                    Dealta = self.new_cns[nubNet - 1] - (
                                        self.old_cns[nubNet - 1] +
                                        self.RLMem.int_mod_action[nubNet])
                                    bound = {1: 0.00001, 2: 0.0001, 3: 0.0001}
                                    if Dealta < -bound[nubNet]:
                                        self.RLMem.SaveReward(nubNet, -1)
                                        # r[nubNet] = - ((self.old_cns[nubNet - 1] + self.RLMem.int_mod_action[nubNet]) - self.new_cns[nubNet-1])
                                    elif Dealta > bound[nubNet]:
                                        self.RLMem.SaveReward(nubNet, -1)
                                        # r[nubNet] = - (- (self.old_cns[nubNet - 1] + self.RLMem.int_mod_action[nubNet]) + self.new_cns[nubNet - 1])
                                    else:
                                        self.RLMem.SaveReward(nubNet, 1)
                                    # TOOL.ALLP(Dealta, f"Dealta")
                                    # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet")
                                    # if r[nubNet] == 1:
                                    #     pass
                                    # else:
                                    #     if nubNet in [1]:
                                    #         r[nubNet] = round(round(r[nubNet], 5) * 1000, 2)  # 0.000__ => 0.__
                                    #     elif nubNet in [2, 3]:
                                    #         r[nubNet] = round(round(r[nubNet], 4) * 100, 2)  # 0.00__ => 0.__

                                    # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet round")
                                    # print(self.new_cns[nubNet-1], self.old_cns[nubNet-1], self.RLMem.int_mod_action[nubNet])
                                elif nubNet in [4, 5]:
                                    Dealta = self.new_cns[
                                        nubNet -
                                        1] - self.RLMem.int_mod_action[nubNet]
                                    if Dealta < -0.01:
                                        # r[nubNet] = - ((self.RLMem.int_mod_action[nubNet]) - self.new_cns[nubNet - 1])
                                        self.RLMem.SaveReward(nubNet, -1)
                                    elif Dealta > 0.01:
                                        # r[nubNet] = - (- (self.RLMem.int_mod_action[nubNet]) + self.new_cns[nubNet - 1])
                                        self.RLMem.SaveReward(nubNet, -1)
                                    else:
                                        self.RLMem.SaveReward(nubNet, 1)
                                    # TOOL.ALLP(Dealta, f"Dealta")
                                    # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet")
                                    # r[nubNet] = round(r[nubNet], 3)
                                    # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet round")
                                    # print(self.new_cns[nubNet - 1], self.old_cns[nubNet - 1], self.RLMem.int_mod_action[nubNet])
                                elif nubNet in [6, 7]:
                                    Dealta = self.new_cns[
                                        1] - 0.55  # normal PZR level # 0.30 - 0.55 = - 0.25 # 0.56 - 0.55 = 0.01
                                    if Dealta < -0.005:  # 0.53 - 0.55 = - 0.02
                                        self.RLMem.SaveReward(
                                            nubNet, (self.new_cns[1] - 0.55) *
                                            10)  # # 0.53 - 0.55 = - 0.02
                                    elif Dealta > 0.005:  # 0.57 - 0.55 = 0.02
                                        self.RLMem.SaveReward(
                                            nubNet, (0.55 - self.new_cns[1]) *
                                            10)  # 0.55 - 0.57 = - 0.02
                                    else:
                                        self.RLMem.SaveReward(nubNet, 1)

                                # r_dict[nubNet].append(r[nubNet])

                                # 종료 조건 계산
                                if __ == 14 and t == t_max - 1:
                                    done = True
                                self.RLMem.SaveDone(nubNet, done)

                            def dp_want_val(val, name):
                                return f"{name}: {self.CNS.mem[val]['Val']:4.4f}"

                            DIS = f"[{self.CurrentIter:3}]" + f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}|"
                            # for _ in r.keys():
                            #     DIS += f"{r[_]:6} |"
                            # for _ in NetOut_dict.keys():
                            #     DIS += f"[{NetOut_dict[_]:0.4f}-{self.RLMem.int_mod_action[_]:4}]"
                            # for para, _ in zip(["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"], [0, 1, 2, 3, 4]):
                            #     DIS += f"| {para}: {self.old_cns[_]:5.2f} | {self.new_cns[_]:5.2f}"
                            print(DIS)

                            # Logger
                            TOOL.log_add(file_name=f"{self.name}.txt",
                                         ep=self.CurrentIter,
                                         ep_iter=ep_iter,
                                         x=self.old_cns)
                            ep_iter += 1

                        # ==================================================================================================
                        # Train

                        gamma = 0.98
                        lmbda = 0.95

                        # 1 .. 10
                        spy_batch, scomp_batch = self.RLMem.GetBatch()
                        # 2 .. 10 + (1 Last value)
                        spy_fin, scomp_fin = self.RLMem.GetFinBatch(
                            self.S_Py, self.S_Comp)

                        # 각 네트워크 별 Advantage 계산
                        # for nubNet in range(0, 6):
                        for nubNet in range(0, self.LocalNet.NubNET):
                            # GAE
                            # r_dict[nubNet]: (5,) -> (5,1)
                            # Netout : (5,1)
                            # done_dict[nubNet]: (5,) -> (5,1)
                            td_target = torch.tensor(self.RLMem.list_reward_temp[nubNet], dtype=torch.float).view(t_max, 1) + \
                                        gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \
                                        torch.tensor(self.RLMem.list_done_temp[nubNet], dtype=torch.float).view(t_max, 1)
                            delta = td_target - self.LocalNet.NET[
                                nubNet].GetPredictCrticOut(
                                    spy_batch, scomp_batch)
                            delta = delta.detach().numpy()

                            adv_list = []
                            adv_ = 0.0
                            for reward in delta[::-1]:
                                adv_ = gamma * adv_ * lmbda + reward[0]
                                adv_list.append([adv_])
                            adv_list.reverse()
                            adv = torch.tensor(adv_list, dtype=torch.float)

                            PreVal = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(
                                    spy_batch, scomp_batch)
                            PreVal = PreVal.gather(
                                1,
                                torch.tensor(
                                    self.RLMem.list_action_temp[nubNet])
                            )  # PreVal_a
                            # TOOL.ALLP(PreVal, f"Preval {nubNet}")

                            # Ratio 계산 a/b == exp(log(a) - log(b))
                            # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}")
                            Preval_old_a_prob = torch.tensor(
                                self.RLMem.list_porb_action_temp[nubNet],
                                dtype=torch.float)
                            ratio = torch.exp(
                                torch.log(PreVal) -
                                torch.log(Preval_old_a_prob))
                            # TOOL.ALLP(ratio, f"ratio {nubNet}")

                            # surr1, 2
                            eps_clip = 0.1
                            surr1 = ratio * adv
                            surr2 = torch.clamp(ratio, 1 - eps_clip,
                                                1 + eps_clip) * adv

                            min_val = torch.min(surr1, surr2)
                            smooth_l1_loss = nn.functional.smooth_l1_loss(
                                self.LocalNet.NET[nubNet].GetPredictCrticOut(
                                    spy_batch, scomp_batch),
                                td_target.detach())

                            loss = -min_val + smooth_l1_loss

                            self.LocalOPT.NETOPT[nubNet].zero_grad()
                            loss.mean().backward()
                            for global_param, local_param in zip(
                                    self.GlobalNet.NET[nubNet].parameters(),
                                    self.LocalNet.NET[nubNet].parameters()):
                                global_param._grad = local_param.grad
                            self.LocalOPT.NETOPT[nubNet].step()
                            self.LocalNet.NET[nubNet].load_state_dict(
                                self.GlobalNet.NET[nubNet].state_dict())

                            # TOOL.ALLP(advantage.mean())
                            # print(self.CurrentIter, 'AgentNub: ', nubNet,
                            #       'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(),
                            #       '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')')

                print('DONE EP')
                break
Exemple #11
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
                 Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(
                self.GlobalNet.NET[_].state_dict())
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                                  NET=self.GlobalNet.NET)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)

    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63']
        self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {
            _: deque(maxlen=self.W.TimeLeg)
            for _ in self.COMPPara
        }

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [
            self.PhyState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.PhyPara
        ]
        [
            self.COMPState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.COMPPara
        ]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0],
                                      self.S_Py.shape[1])
        self.S_Comp = torch.tensor(
            [self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0],
                                          self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val / 1000, 7)  # 가압기 압력
        if para == 'ZINST63': val = round(val / 100, 7)  # 가압기 수위
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            self.CNS.init_cns(initial_nub=1)
            print('DONE initial')
            time.sleep(1)
            # self.CNS._send_malfunction_signal(12, 100100, 15)
            # time.sleep(1)

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            print(self.CurrentIter)

            # Initial
            done = False
            self.InitialStateSet()

            while not done:
                for t in range(self.W.TimeLeg):
                    self.CNS.run_freeze_CNS()
                    self.MakeStateSet()

                for __ in range(15):
                    spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                    a_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    r_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    # Sampling
                    for t in range(5):
                        TimeDB = {
                            'Netout': {},  # 0: .. 1:..
                        }
                        for nubNet in range(self.LocalNet.NubNET):
                            NetOut = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(x_py=self.S_Py,
                                                           x_comp=self.S_Comp)
                            NetOut = NetOut.tolist()[0][
                                0]  # (1, 1) -> (1, ) -> ()
                            TimeDB['Netout'][nubNet] = NetOut
                            a_dict[nubNet] = NetOut

                        spy_lst.append(
                            self.S_Py.tolist()[0])  # (1, 2, 10) -list> (2, 10)
                        scomp_lst.append(self.S_Comp.tolist()
                                         [0])  # (1, 2, 10) -list> (2, 10)

                        old_before = {0: 0, 1: 0}
                        for nubNet in range(self.LocalNet.NubNET):
                            old_before[nubNet] = self.S_ONE_Py[
                                nubNet] + TimeDB['Netout'][nubNet]

                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()

                        r = {0: 0, 1: 0}

                        for nub_val in range(0, 2):
                            if self.S_ONE_Py[nub_val] - 0.0001 < old_before[
                                    nub_val] < self.S_ONE_Py[nub_val] + 0.0001:
                                r[nub_val] = 1
                            else:
                                r[nub_val] = 0
                        if r[0] == 0.1 and r[1] == 0.1:
                            t_r = 0.1
                        else:
                            t_r = -0.1
                        # t_r = r[0] + r[1]
                        # r_lst.append(t_r)

                        for nubNet in range(
                                self.LocalNet.NubNET):  # 보상 네트워크별로 저장
                            r_dict[nubNet].append(r[nubNet])

                        print(self.CurrentIter, TimeDB['Netout'],
                              self.S_ONE_Py[0] - 0.0001, old_before[0],
                              self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|',
                              self.S_ONE_Py[1] - 0.0001, old_before[1],
                              self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|',
                              r[0], r[1], t_r)
                    # ==================================================================================================
                    # Train

                    gamma = 0.98
                    spy_fin = self.S_Py  # (1, 2, 10)
                    scomp_fin = self.S_Comp  # (1, 2, 10)
                    spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                    scomp_batch = torch.tensor(scomp_lst, dtype=torch.float)

                    # 각 네트워크 별 Advantage 계산
                    for nubNet in range(self.LocalNet.NubNET):
                        R = 0.0 if done else self.LocalNet.NET[
                            nubNet].GetPredictCrticOut(spy_fin,
                                                       scomp_fin).item()
                        td_target_lst = []
                        for reward in r_dict[nubNet][::-1]:
                            R = gamma * R + reward
                            td_target_lst.append([R])
                        td_target_lst.reverse()

                        td_target = torch.tensor(td_target_lst)
                        value = self.LocalNet.NET[nubNet].GetPredictCrticOut(
                            spy_batch, scomp_batch)
                        advantage = td_target - value

                        PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut(
                            spy_batch, scomp_batch)

                        loss = -torch.log(PreVal) * advantage.detach() + \
                               nn.functional.smooth_l1_loss(self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_batch, scomp_batch),
                                                            td_target.detach())

                        self.LocalOPT.NETOPT[nubNet].zero_grad()
                        loss.mean().backward()
                        for global_param, local_param in zip(
                                self.GlobalNet.NET[nubNet].parameters(),
                                self.LocalNet.NET[nubNet].parameters()):
                            global_param._grad = local_param.grad
                        self.LocalOPT.NETOPT[nubNet].step()
                        self.LocalNet.NET[nubNet].load_state_dict(
                            self.GlobalNet.NET[nubNet].state_dict())

                        # TOOL.ALLP(advantage.mean())
                        print(self.CurrentIter, 'adv: ',
                              advantage.mean().item(), 'loss: ',
                              loss.mean().item())

                print('DONE EP')
                break
Exemple #12
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
                 Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(
                self.GlobalNet.NET[_].state_dict())
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                                  NET=self.GlobalNet.NET)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()
        # GP Setting
        self.fig_dict = {
            i_: plt.figure(figsize=(13, 13))
            for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
        }
        self.ax_dict = {
            i_: self.fig_dict[i_].add_subplot()
            for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
        }
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)

    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63', 'ZVCT']
        self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {
            _: deque(maxlen=self.W.TimeLeg)
            for _ in self.COMPPara
        }

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [
            self.PhyState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.PhyPara
        ]
        [
            self.COMPState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.COMPPara
        ]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0],
                                      self.S_Py.shape[1])
        self.S_Comp = torch.tensor(
            [self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0],
                                          self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val / 1000, 6)  # 가압기 압력
        if para == 'ZINST63': val = round(val / 100, 6)  # 가압기 수위
        if para == 'ZVCT': val = round(val / 100, 5)  # VCT 수위
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5
            self.CNS.reset(initial_nub=1,
                           mal=True,
                           mal_case=36,
                           mal_opt=size,
                           mal_time=maltime)
            print(f'DONE initial {size}, {maltime}')

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            # 진단 모듈 Tester !
            if self.CurrentIter != 0 and self.CurrentIter % 15 == 0:
                print(self.CurrentIter, 'Yes Test')
                self.PrognosticMode = True
            else:
                print(self.CurrentIter, 'No Test')
                self.PrognosticMode = False

            # Initial
            done = False
            self.InitialStateSet()

            # GP 이전 데이터 Clear
            [
                self.ax_dict[i_].clear()
                for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
            ]

            while not done:
                fulltime = 15
                t_max = 5  # total iteration = fulltime * t_max
                tun = [1000, 100, 100, 1, 1]
                ro = [1, 1, 1, 2, 2]
                ProgRecodBox = {
                    "ZINST58": [],
                    "ZINST63": [],
                    "ZVCT": [],
                    "BFV122": [],
                    "BPV145": []
                }  # recode 초기화
                if self.PrognosticMode:
                    # Test Mode
                    for t in range(self.W.TimeLeg):
                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()
                        [
                            ProgRecodBox[i_].append(
                                round(self.CNS.mem[i_]['Val'], r_) / t_)
                            for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro)
                        ]

                    for __ in range(fulltime * t_max):  # total iteration
                        if __ != 0 and __ % 10 == 0:  # 10Step 마다 예지
                            # copy self.S_Py, self.S_Comp
                            copySPy, copySComp = self.S_Py, self.S_Comp
                            copyRecodBox = {
                                "ZINST58": [],
                                "ZINST63": [],
                                "ZVCT": [],
                                "BFV122": [],
                                "BPV145": []
                            }  # recode 초기화
                            # TOOL.ALLP(copyRecodBox["ZINST58"], "CopySPy")
                            for PredictTime in range(
                                    __,
                                    fulltime * t_max):  # 시간이 갈수록 예지하는 시간이 줄어듬.
                                # 예지 시작
                                save_ragular_para = {
                                    _: 0
                                    for _ in range(self.LocalNet.NubNET)
                                }
                                for nubNet in range(0, self.LocalNet.NubNET):
                                    NetOut = self.LocalNet.NET[
                                        nubNet].GetPredictActorOut(
                                            x_py=copySPy, x_comp=copySComp)
                                    NetOut = NetOut.view(-1)  # (1, 2) -> (2, )
                                    act_ = NetOut.argmax().item(
                                    )  # 행열에서 최대값을 추출 후 값 반환
                                    if nubNet < 4:
                                        save_ragular_para[nubNet] = (
                                            act_ -
                                            10) / 10  # act_ 값이 값의 증감으로 변경
                                    else:
                                        save_ragular_para[nubNet] = (
                                            act_ -
                                            100) / 100  # act_ 값이 값의 증감으로 변경
                                # TOOL.ALLP(save_ragular_para, "PARA")

                                # copySPy, copySComp에 값 추가
                                # copySpy
                                copySPyLastVal = copySPy[:, :,
                                                         -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.
                                copySPyLastVal = copySPyLastVal + tensor([
                                    [[save_ragular_para[0] / 1000],
                                     [save_ragular_para[1] / 100],
                                     [save_ragular_para[2] / 100]]
                                ])  # 마지막 변수에 예측된 값을 더해줌.
                                copySPy = torch.cat((copySPy, copySPyLastVal),
                                                    dim=2)  # 본래 텐서에 값을 더함.
                                copySPy = copySPy[:, :, 1:]  # 맨뒤의 값을 자름.
                                # copySComp
                                copySCompLastVal = copySComp[:, :,
                                                             -1:]  # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴.
                                # copySpy와 다르게 copy SComp는 이전의 제어 값을 그대로 사용함.

                                # copySCompLastVal = copySCompLastVal + tensor([[
                                #     [save_ragular_para[3]], [save_ragular_para[4]],
                                # ]])  # 마지막 변수에 예측된 값을 더해줌.

                                #TODO
                                # 자기자신 자체
                                copySCompLastVal = tensor(
                                    [[[save_ragular_para[3]],
                                      [save_ragular_para[4]]]])

                                copySComp = torch.cat(
                                    (copySComp, copySCompLastVal),
                                    dim=2)  # 본래 텐서에 값을 더함.
                                copySComp = copySComp[:, :, 1:]  # 맨뒤의 값을 자름.
                                # 결과값 Recode
                                copyRecodBox["ZINST58"].append(
                                    copySPyLastVal[0, 0, 0].item())
                                copyRecodBox["ZINST63"].append(
                                    copySPyLastVal[0, 1, 0].item())
                                copyRecodBox["ZVCT"].append(
                                    copySPyLastVal[0, 2, 0].item())

                                copyRecodBox["BFV122"].append(
                                    copySComp[0, 0, 0].item())
                                copyRecodBox["BPV145"].append(
                                    copySComp[0, 1, 0].item())
                            # 예지 종료 결과값 Recode 그래픽화
                            [
                                self.ax_dict[i_].plot(
                                    ProgRecodBox[i_] + copyRecodBox[i_],
                                    label=f"{i_}_{__}") for i_ in [
                                        "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                        "BPV145"
                                    ]
                            ]

                        # plt.show()
                        # CNS + 1 Step
                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()
                        [
                            ProgRecodBox[i_].append(
                                round(self.CNS.mem[i_]['Val'], r_) / t_)
                            for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro)
                        ]

                    # END Test Mode CODE
                    [
                        self.ax_dict[i_].grid() for i_ in
                        ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
                    ]
                    [
                        self.ax_dict[i_].legend() for i_ in
                        ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
                    ]
                    [
                        self.fig_dict[i_].savefig(
                            f"{self.CurrentIter}_{i_}.png") for i_ in
                        ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"]
                    ]
                    print('END TEST')

                else:
                    # Train Mode
                    for t in range(self.W.TimeLeg):
                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()

                    for __ in range(fulltime):
                        spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                        a_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        mu_dict = {_: [] for _ in range(self.LocalNet.NubNET)}

                        a_now = {_: 0 for _ in range(self.LocalNet.NubNET)}
                        a_prob = {_: [] for _ in range(self.LocalNet.NubNET)}
                        r_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                        done_dict = {
                            _: []
                            for _ in range(self.LocalNet.NubNET)
                        }
                        #
                        trag_mu = {_: [] for _ in range(self.LocalNet.NubNET)}
                        # Sampling
                        for t in range(t_max):
                            NetOut_dict = {
                                _: 0
                                for _ in range(self.LocalNet.NubNET)
                            }
                            for nubNet in [0, 2]:
                                TOOL.ALLP(self.S_Py, 'S_Py')
                                TOOL.ALLP(self.S_Comp, 'S_Comp')
                                # TODO
                                #  Network는 0, 2은 actor net
                                mu_v = self.LocalNet.NET[
                                    nubNet].GetPredictActorOut(
                                        x_py=self.S_Py, x_comp=self.S_Comp)
                                mu = mu_v.data.numpy()  # detach 이후 numpy로 반환
                                TOOL.ALLP(mu, "Mu")
                                # Action 선택
                                logstd = self.LocalNet.NET[
                                    nubNet].logstd.data.numpy()
                                act = mu + np.exp(logstd) * np.random.normal(
                                    size=logstd.shape)
                                act = np.clip(act, 0, 1)
                                TOOL.ALLP(act, "ACT")  # (1, 3) 또는 (1, 2)
                                # 액션 및 mu 저장
                                a_dict[nubNet].append(act)
                                mu_dict[nubNet].append(mu)
                                NetOut_dict[nubNet] = act[
                                    0]  # 현재 상태의 action DIS (3,) 또는 (2,)

                            # 상태 저장
                            spy_lst.append(self.S_Py.tolist()
                                           [0])  # (1, 2, 10) -list> (2, 10)
                            scomp_lst.append(self.S_Comp.tolist()
                                             [0])  # (1, 2, 10) -list> (2, 10)

                            # old val to compare the new val
                            ComparedPara = [
                                "ZINST58", "ZINST63", "ZVCT", "BFV122",
                                "BPV145"
                            ]
                            ComparedParaRound = [1, 1, 1, 2, 2]
                            self.old_cns = {
                                para: round(self.CNS.mem[para]['Val'], pr)
                                for para, pr in zip(ComparedPara,
                                                    ComparedParaRound)
                            }

                            # CNS + 1 Step
                            self.CNS.run_freeze_CNS()
                            self.MakeStateSet()
                            self.new_cns = {
                                para: round(self.CNS.mem[para]['Val'], pr)
                                for para, pr in zip(ComparedPara,
                                                    ComparedParaRound)
                            }

                            # 보상 및 종료조건 계산
                            r = {0: 0, 1: 0, 2: 0, 3: 0}
                            pa = {0: 0, 1: 0, 2: 0, 3: 0}

                            for nubNet in range(
                                    0,
                                    self.LocalNet.NubNET):  # 보상 네트워크별로 계산 및 저장
                                if nubNet == 0 or nubNet == 1:
                                    # TODO
                                    #  여기서 부터 작업해야함.
                                    r[nubNet] = 1
                                elif nubNet == 2 or nubNet == 3:
                                    pass
                                r_dict[nubNet].append(r[nubNet])

                                # 종료 조건 계산
                                if __ == 14 and t == t_max - 1:
                                    done_dict[nubNet].append(0)
                                    done = True
                                else:
                                    done_dict[nubNet].append(1)

                            def dp_want_val(val, name):
                                return f"{name}: {self.CNS.mem[val]['Val']:4.4f}"

                            print(
                                self.CurrentIter,
                                f"{r[0]:4}|{r[1]:4}|{r[2]:4}|{r[3]:4}|{r[4]:6}|{r[5]:6}|",
                                f'{NetOut_dict[0]:0.4f}',
                                f'{NetOut_dict[1]:0.4f}',
                                f'{NetOut_dict[2]:0.4f}',
                                f'{NetOut_dict[3]:0.4f}',
                                f'{NetOut_dict[4]:0.4f}',
                                f'{NetOut_dict[5]:0.4f}',
                                f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}",
                                # dp_want_val('PVCT', 'VCT pressure'),
                                f"VCT Level: {self.new_cns['ZVCT']}",
                                f"{self.old_cns['ZVCT'] + pa[1]:5.2f} + {pa[1]:5.2f}",
                                f"PZR pre: {self.new_cns['ZINST58']}",
                                f"{self.old_cns['ZINST58'] + pa[2]:5.2f} + {pa[2]:5.2f}",
                                f"PZR Level: {self.new_cns['ZINST63']}",
                                f"{self.old_cns['ZINST63'] + pa[3]:5.2f} + {pa[3]:5.2f}",
                                f"BFV122: {self.new_cns['BFV122']}",
                                f"{self.new_cns['BFV122'] + pa[4]:5.2f} + {pa[4]:5.2f}",
                                f"BFV122: {self.new_cns['BPV145']}",
                                f"{self.new_cns['BPV145'] + pa[5]:5.2f} + {pa[5]:5.2f}",
                                # dp_want_val('UPRT', 'PRT temp'), dp_want_val('ZINST48', 'PRT pressure'),
                                # dp_want_val('ZINST36', 'Let-down flow'), dp_want_val('BFV122', 'Charging Valve pos'),
                                # dp_want_val('BPV145', 'Let-down Valve pos'),
                            )

                        # ==================================================================================================
                        # Train

                        gamma = 0.98
                        lmbda = 0.95

                        # 1 .. 10
                        spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                        scomp_batch = torch.tensor(scomp_lst,
                                                   dtype=torch.float)
                        # 2 .. 10 + (1 Last value)
                        spy_lst.append(self.S_Py.tolist()[0])
                        scomp_lst.append(self.S_Comp.tolist()[0])
                        spy_fin = torch.tensor(spy_lst[1:], dtype=torch.float)
                        scomp_fin = torch.tensor(scomp_lst[1:],
                                                 dtype=torch.float)

                        # 각 네트워크 별 Advantage 계산
                        for nubNet in range(0, self.LocalNet.NubNET):
                            # GAE
                            # r_dict[nubNet]: (5,) -> (5,1)
                            # Netout : (5,1)
                            # done_dict[nubNet]: (5,) -> (5,1)
                            td_target = torch.tensor(r_dict[nubNet], dtype=torch.float).view(t_max, 1) + \
                                        gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \
                                        torch.tensor(done_dict[nubNet], dtype=torch.float).view(t_max, 1)
                            delta = td_target - self.LocalNet.NET[
                                nubNet].GetPredictCrticOut(
                                    spy_batch, scomp_batch)
                            delta = delta.detach().numpy()

                            adv_list = []
                            adv_ = 0.0
                            for reward in delta[::-1]:
                                adv_ = gamma * adv_ * lmbda + reward[0]
                                adv_list.append([adv_])
                            adv_list.reverse()
                            adv = torch.tensor(adv_list, dtype=torch.float)

                            PreVal = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(
                                    spy_batch, scomp_batch)
                            PreVal = PreVal.gather(
                                1, torch.tensor(a_dict[nubNet]))  # PreVal_a
                            # TOOL.ALLP(PreVal, f"Preval {nubNet}")

                            # Ratio 계산 a/b == exp(log(a) - log(b))
                            # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}")
                            Preval_old_a_prob = torch.tensor(a_prob[nubNet],
                                                             dtype=torch.float)
                            ratio = torch.exp(
                                torch.log(PreVal) -
                                torch.log(Preval_old_a_prob))
                            # TOOL.ALLP(ratio, f"ratio {nubNet}")

                            # surr1, 2
                            eps_clip = 0.1
                            surr1 = ratio * adv
                            surr2 = torch.clamp(ratio, 1 - eps_clip,
                                                1 + eps_clip) * adv

                            min_val = torch.min(surr1, surr2)
                            smooth_l1_loss = nn.functional.smooth_l1_loss(
                                self.LocalNet.NET[nubNet].GetPredictCrticOut(
                                    spy_batch, scomp_batch),
                                td_target.detach())

                            loss = -min_val + smooth_l1_loss

                            self.LocalOPT.NETOPT[nubNet].zero_grad()
                            loss.mean().backward()
                            for global_param, local_param in zip(
                                    self.GlobalNet.NET[nubNet].parameters(),
                                    self.LocalNet.NET[nubNet].parameters()):
                                global_param._grad = local_param.grad
                            self.LocalOPT.NETOPT[nubNet].step()
                            self.LocalNet.NET[nubNet].load_state_dict(
                                self.GlobalNet.NET[nubNet].state_dict())

                            # TOOL.ALLP(advantage.mean())
                            # print(self.CurrentIter, 'AgentNub: ', nubNet,
                            #       'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(),
                            #       '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')')

                print('DONE EP')
                break
Exemple #13
0
class Agent(mp.Process):
    def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip,
                 Remote_port):
        mp.Process.__init__(self)
        # Network info
        self.GlobalNet = GlobalNet
        self.LocalNet = NETBOX()
        for _ in range(0, self.LocalNet.NubNET):
            self.LocalNet.NET[_].load_state_dict(
                self.GlobalNet.NET[_].state_dict())
        self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET,
                                  NET=self.GlobalNet.NET)
        # CNS
        self.CNS = CNS(self.name, CNS_ip, CNS_port, Remote_ip, Remote_port)
        # SharedMem
        self.mem = MEM
        self.LocalMem = copy.deepcopy(self.mem)
        # Work info
        self.W = Work_info()
        print(f'Make -- {self}')

    # ==============================================================================================================
    # 제어 신호 보내는 파트
    def send_action_append(self, pa, va):
        for _ in range(len(pa)):
            self.para.append(pa[_])
            self.val.append(va[_])

    def send_action(self, act):
        # 전송될 변수와 값 저장하는 리스트
        self.para = []
        self.val = []

        # 최종 파라메터 전송
        self.CNS._send_control_signal(self.para, self.val)

    #
    # ==============================================================================================================
    # 입력 출력 값 생성
    def InitialStateSet(self):
        self.PhyPara = ['ZINST58', 'ZINST63', 'ZVCT']
        self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara}

        self.COMPPara = ['BFV122', 'BPV145']
        self.COMPState = {
            _: deque(maxlen=self.W.TimeLeg)
            for _ in self.COMPPara
        }

    def MakeStateSet(self):
        # 값을 쌓음 (return Dict)
        [
            self.PhyState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.PhyPara
        ]
        [
            self.COMPState[_].append(
                self.PreProcessing(_, self.CNS.mem[_]['Val']))
            for _ in self.COMPPara
        ]

        # Tensor로 전환
        self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara])
        self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0],
                                      self.S_Py.shape[1])
        self.S_Comp = torch.tensor(
            [self.COMPState[key] for key in self.COMPPara])
        self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0],
                                          self.S_Comp.shape[1])

        # Old 1개 리스트
        self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara]
        self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara]

    def PreProcessing(self, para, val):
        if para == 'ZINST58': val = round(val / 1000, 7)  # 가압기 압력
        if para == 'ZINST63': val = round(val / 100, 7)  # 가압기 수위
        if para == 'ZVCT': val = round(val / 100, 7)  # VCT 수위
        return val

    # ==============================================================================================================

    def run(self):
        while True:
            size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5
            self.CNS.reset(initial_nub=1,
                           mal=True,
                           mal_case=36,
                           mal_opt=size,
                           mal_time=maltime)
            print(f'DONE initial {size}, {maltime}')

            # Get iter
            self.CurrentIter = self.mem['Iter']
            self.mem['Iter'] += 1
            print(self.CurrentIter)

            # Initial
            done = False
            self.InitialStateSet()

            while not done:
                for t in range(self.W.TimeLeg):
                    self.CNS.run_freeze_CNS()
                    self.MakeStateSet()

                for __ in range(15):
                    spy_lst, scomp_lst, a_lst, r_lst = [], [], [], []
                    a_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    a_now = {_: 0 for _ in range(self.LocalNet.NubNET)}
                    a_prob = {_: [] for _ in range(self.LocalNet.NubNET)}
                    r_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    done_dict = {_: [] for _ in range(self.LocalNet.NubNET)}
                    # Sampling
                    t_max = 5
                    for t in range(t_max):
                        NetOut_dict = {
                            _: 0
                            for _ in range(self.LocalNet.NubNET)
                        }
                        for nubNet in range(0, self.LocalNet.NubNET):
                            # TOOL.ALLP(self.S_Py, 'S_Py')
                            # TOOL.ALLP(self.S_Comp, 'S_Comp')
                            NetOut = self.LocalNet.NET[
                                nubNet].GetPredictActorOut(x_py=self.S_Py,
                                                           x_comp=self.S_Comp)
                            NetOut = NetOut.view(-1)  # (1, 2) -> (2, )
                            # TOOL.ALLP(NetOut, 'Netout before Categorical')

                            if nubNet == 0:
                                act = torch.distributions.Categorical(
                                    NetOut).sample().item(
                                    )  # 2개 중 샘플링해서 값 int 반환
                                # TOOL.ALLP(act, 'act')
                                NetOut = NetOut.tolist()[act]
                                # TOOL.ALLP(NetOut, 'NetOut')
                            else:
                                act = 0
                                NetOut = NetOut[0].item()
                            NetOut_dict[nubNet] = NetOut
                            # TOOL.ALLP(NetOut_dict, f'NetOut{nubNet}')

                            a_now[nubNet] = act
                            a_dict[nubNet].append([act])
                            a_prob[nubNet].append([NetOut])

                        spy_lst.append(
                            self.S_Py.tolist()[0])  # (1, 2, 10) -list> (2, 10)
                        scomp_lst.append(self.S_Comp.tolist()
                                         [0])  # (1, 2, 10) -list> (2, 10)

                        # old val to compare the new val
                        ComparedPara = ["ZVCT"]
                        self.old_cns = {
                            para: round(self.CNS.mem[para]['Val'], 2)
                            for para in ComparedPara
                        }

                        # CNS + 1 Step
                        self.CNS.run_freeze_CNS()
                        self.MakeStateSet()
                        self.new_cns = {
                            para: round(self.CNS.mem[para]['Val'], 2)
                            for para in ComparedPara
                        }

                        # 보상 및 종료조건 계산
                        r = {0: 0, 1: 0}
                        for nubNet in range(
                                0, self.LocalNet.NubNET):  # 보상 네트워크별로 계산 및 저장
                            if nubNet == 0:
                                if self.CNS.mem['KCNTOMS']['Val'] < maltime:
                                    if a_now[nubNet] == 1:  # Malfunction
                                        r[nubNet] = -1
                                    else:
                                        r[nubNet] = 1
                                else:
                                    if a_now[nubNet] == 1:  # Malfunction
                                        r[nubNet] = 1
                                    else:
                                        r[nubNet] = -1
                            else:
                                if self.old_cns["ZVCT"] + NetOut_dict[
                                        1] == self.new_cns["ZVCT"]:
                                    r[nubNet] = 1
                                else:
                                    r[nubNet] = -1

                            r_dict[nubNet].append(r[nubNet])

                            # 종료 조건 계산
                            if __ == 14 and t == t_max - 1:
                                done_dict[nubNet].append(0)
                                done = True
                            else:
                                done_dict[nubNet].append(1)

                        def dp_want_val(val, name):
                            return f"{name}: {self.CNS.mem[val]['Val']:3.4f}"

                        print(
                            self.CurrentIter,
                            f"{r[0]:3}|{r[1]:3}|",
                            f'{NetOut_dict[0]:0.4f}',
                            f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}",
                            dp_want_val('PVCT', 'VCT pressure'),
                            f"VCT Level: {self.new_cns['ZVCT']} "
                            f"{self.old_cns['ZVCT'] + NetOut_dict[1]:3.4f} + {NetOut_dict[1]:3.4f}",
                            dp_want_val('UPRT', 'PRT temp'),
                            dp_want_val('ZINST48', 'PRT pressure'),
                            # dp_want_val('ZINST36', 'Let-down flow'), dp_want_val('BFV122', 'Charging Valve pos'),
                            # dp_want_val('BPV145', 'Let-down Valve pos'),
                        )

                    # ==================================================================================================
                    # Train

                    gamma = 0.98
                    lmbda = 0.95

                    # 1 .. 10
                    spy_batch = torch.tensor(spy_lst, dtype=torch.float)
                    scomp_batch = torch.tensor(scomp_lst, dtype=torch.float)
                    # 2 .. 10 + (1 Last value)
                    spy_lst.append(self.S_Py.tolist()[0])
                    scomp_lst.append(self.S_Comp.tolist()[0])
                    spy_fin = torch.tensor(spy_lst[1:], dtype=torch.float)
                    scomp_fin = torch.tensor(scomp_lst[1:], dtype=torch.float)

                    # 각 네트워크 별 Advantage 계산
                    for nubNet in range(0, self.LocalNet.NubNET):
                        # GAE
                        # r_dict[nubNet]: (5,) -> (5,1)
                        # Netout : (5,1)
                        # done_dict[nubNet]: (5,) -> (5,1)
                        td_target = torch.tensor(r_dict[nubNet], dtype=torch.float).view(t_max, 1) + \
                                    gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \
                                    torch.tensor(done_dict[nubNet], dtype=torch.float).view(t_max, 1)
                        delta = td_target - self.LocalNet.NET[
                            nubNet].GetPredictCrticOut(spy_batch, scomp_batch)
                        delta = delta.detach().numpy()

                        adv_list = []
                        adv_ = 0.0
                        for reward in delta[::-1]:
                            adv_ = gamma * adv_ * lmbda + reward[0]
                            adv_list.append([adv_])
                        adv_list.reverse()
                        adv = torch.tensor(adv_list, dtype=torch.float)

                        PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut(
                            spy_batch, scomp_batch)
                        if nubNet == 0:
                            PreVal = PreVal.gather(
                                1, torch.tensor(a_dict[nubNet]))  # PreVal_a
                        # TOOL.ALLP(PreVal, f"Preval {nubNet}")

                        # Ratio 계산 a/b == exp(log(a) - log(b))
                        # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}")
                        Preval_old_a_prob = torch.tensor(a_prob[nubNet],
                                                         dtype=torch.float)
                        ratio = torch.exp(
                            torch.log(PreVal) - torch.log(Preval_old_a_prob))
                        # TOOL.ALLP(ratio, f"ratio {nubNet}")

                        # surr1, 2
                        eps_clip = 0.1
                        surr1 = ratio * adv
                        surr2 = torch.clamp(ratio, 1 - eps_clip,
                                            1 + eps_clip) * adv

                        min_val = torch.min(surr1, surr2)
                        smooth_l1_loss = nn.functional.smooth_l1_loss(
                            self.LocalNet.NET[nubNet].GetPredictCrticOut(
                                spy_batch, scomp_batch), td_target.detach())

                        loss = -min_val + smooth_l1_loss

                        self.LocalOPT.NETOPT[nubNet].zero_grad()
                        loss.mean().backward()
                        for global_param, local_param in zip(
                                self.GlobalNet.NET[nubNet].parameters(),
                                self.LocalNet.NET[nubNet].parameters()):
                            global_param._grad = local_param.grad
                        self.LocalOPT.NETOPT[nubNet].step()
                        self.LocalNet.NET[nubNet].load_state_dict(
                            self.GlobalNet.NET[nubNet].state_dict())

                        # TOOL.ALLP(advantage.mean())
                        # print(self.CurrentIter, 'AgentNub: ', nubNet,
                        #       'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(),
                        #       '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')')

                print('DONE EP')
                break