class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = NETBOX() for _ in range(0, self.LocalNet.NubNET): self.LocalNet.NET[_].load_state_dict( self.GlobalNet.NET[_].state_dict()) self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET) # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() # GP Setting self.fig_dict = { i_: plt.figure(figsize=(13, 13)) for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] } self.ax_dict = { i_: self.fig_dict[i_].add_subplot() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] } print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63', 'ZVCT'] self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = { _: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara } def MakeStateSet(self): # 값을 쌓음 (return Dict) [ self.PhyState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara ] [ self.COMPState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara ] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor( [self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val / 1000, 5) # 가압기 압력 if para == 'ZINST63': val = round(val / 100, 4) # 가압기 수위 if para == 'ZVCT': val = round(val / 100, 4) # VCT 수위 if para == 'BFV122': val = round(val, 2) # BF122 Pos if para == 'BPV145': val = round(val, 2) # BPV145 Pos return val # ============================================================================================================== def run(self): while True: size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5 self.CNS.reset(initial_nub=1, mal=True, mal_case=36, mal_opt=size, mal_time=maltime) print(f'DONE initial {size}, {maltime}') # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 # 진단 모듈 Tester ! if self.CurrentIter != 0 and self.CurrentIter % 30 == 0: print(self.CurrentIter, 'Yes Test') self.PrognosticMode = True else: print(self.CurrentIter, 'No Test') self.PrognosticMode = False # Initial done = False self.InitialStateSet() # GP 이전 데이터 Clear [ self.ax_dict[i_].clear() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] while not done: fulltime = 15 t_max = 5 # total iteration = fulltime * t_max tun = [1000, 100, 100, 1, 1] ro = [2, 2, 2, 2, 2] ProgRecodBox = { "ZINST58": [], "ZINST63": [], "ZVCT": [], "BFV122": [], "BPV145": [] } # recode 초기화 if self.PrognosticMode: # Test Mode SOFTMODE = False for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() [ ProgRecodBox[i_].append( round(self.CNS.mem[i_]['Val'], r_) / t_) for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro) ] if not SOFTMODE: for __ in range(fulltime * t_max): # total iteration if __ != 0 and __ % 10 == 0: # 10Step 마다 예지 # copy self.S_Py, self.S_Comp copySPy, copySComp = self.S_Py, self.S_Comp copyRecodBox = { "ZINST58": [], "ZINST63": [], "ZVCT": [], "BFV122": [], "BPV145": [] } # recode 초기화 # TOOL.ALLP(copyRecodBox["ZINST58"], "CopySPy") for PredictTime in range( __, fulltime * t_max): # 시간이 갈수록 예지하는 시간이 줄어듬. # 예지 시작 save_ragular_para = { _: 0 for _ in range(self.LocalNet.NubNET) } for nubNet in range( 0, self.LocalNet.NubNET): NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=copySPy, x_comp=copySComp) NetOut = NetOut.view( -1) # (1, 2) -> (2, ) TOOL.ALLP(NetOut, 'Net_out') if nubNet < 6: act_ = NetOut.argmax().item( ) # 행열에서 최대값을 추출 후 값 반환 save_ragular_para[nubNet] = ( act_ - 100 ) / 100 # act_ 값이 값의 증감으로 변경 else: # 6, 7 save_ragular_para[ nubNet] = TOOL.ALLP( save_ragular_para[nubNet], f'save_reagular_para{nubNet}') TOOL.ALLP(save_ragular_para, "save_ragular_para") # copySPy, copySComp에 값 추가 # copySpy copySPyLastVal = copySPy[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. # add_val = tensor([[ # [round(save_ragular_para[0] / 1000, 5)], # [round(save_ragular_para[1] / 100, 4)], # [round(save_ragular_para[2] / 100, 4)] # ]]) add_val = tensor( [[[save_ragular_para[6][0]], [save_ragular_para[6][1]], [save_ragular_para[6][2]]]], dtype=torch.float) TOOL.ALLP(copySPyLastVal, "copySPyLastVal") TOOL.ALLP(add_val, "add_val") # copySPyLastVal = copySPyLastVal + add_val # 마지막 변수에 예측된 값을 더해줌. copySPyLastVal = add_val # 마지막 변수에 예측된 값을 더해줌. copySPy = (copySPy, copySPyLastVal), dim=2) # 본래 텐서에 값을 더함. # 반올림 TOOL.ALLP(, "COPYSPY") copySPy = np.around(, decimals=5) TOOL.ALLP(copySPy, "COPYSPY_Round") copySPy = torch.tensor(copySPy) copySPy = copySPy[:, :, 1:] # 맨뒤의 값을 자름. # TOOL.ALLP(, "copySPy Next") # copySComp copySCompLastVal = copySComp[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. # TOOL.ALLP(, "COPYSCOMP") # copySpy와 다르게 copy SComp는 이전의 제어 값을 그대로 사용함. # TODO # 자기자신 자체 copySCompLastVal = tensor( [[[round(save_ragular_para[3], 2)], [round(save_ragular_para[4], 2)]]]) copySComp = (copySComp, copySCompLastVal), dim=2) # 본래 텐서에 값을 더함. # 반올림 copySComp = np.around(, decimals=3) copySComp = torch.tensor(copySComp) copySComp = copySComp[:, :, 1:] # 맨뒤의 값을 자름. # 결과값 Recode copyRecodBox["ZINST58"].append( copySPyLastVal[0, 0, 0].item()) copyRecodBox["ZINST63"].append( copySPyLastVal[0, 1, 0].item()) copyRecodBox["ZVCT"].append( copySPyLastVal[0, 2, 0].item()) copyRecodBox["BFV122"].append( copySComp[0, 0, 0].item()) copyRecodBox["BPV145"].append( copySComp[0, 1, 0].item()) # 예지 종료 결과값 Recode 그래픽화 [ self.ax_dict[i_].plot( ProgRecodBox[i_] + copyRecodBox[i_], label=f"{i_}_{__}") for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145" ] ] # # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet() [ ProgRecodBox[i_].append( round(self.CNS.mem[i_]['Val'], r_) / t_) for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro) ] else: for __ in range(fulltime * t_max): # total iteration if __ != 0 and __ % 10 == 0: # 10Step 마다 예지 # copy self.S_Py, self.S_Comp copySPy, copySComp = self.S_Py, self.S_Comp copyRecodBox = { "ZINST58": [], "ZINST63": [], "ZVCT": [], "BFV122": [], "BPV145": [] } # recode 초기화 # TOOL.ALLP(copyRecodBox["ZINST58"], "CopySPy") for PredictTime in range( __, fulltime * t_max): # 시간이 갈수록 예지하는 시간이 줄어듬. # 예지 시작 save_ragular_para = { _: 0 for _ in range(self.LocalNet.NubNET) } for nubNet in range( 0, self.LocalNet.NubNET): NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=copySPy, x_comp=copySComp) NetOut = NetOut.view( -1) # (1, 2) -> (2, ) act_ = NetOut.argmax().item( ) # 행열에서 최대값을 추출 후 값 반환 save_ragular_para[nubNet] = ( act_ - 100) / 100 # act_ 값이 값의 증감으로 변경 TOOL.ALLP(save_ragular_para, "save_ragular_para") # copySPy, copySComp에 값 추가 # copySpy copySPyLastVal = copySPy[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. add_val = tensor([[[ round(save_ragular_para[0] / 1000, 5) ], [ round(save_ragular_para[1] / 100, 4) ], [round(save_ragular_para[2] / 100, 4)]]]) TOOL.ALLP(copySPyLastVal, "copySPyLastVal") TOOL.ALLP(add_val, "add_val") copySPyLastVal = copySPyLastVal + add_val # 마지막 변수에 예측된 값을 더해줌. copySPy = (copySPy, copySPyLastVal), dim=2) # 본래 텐서에 값을 더함. # 반올림 TOOL.ALLP(, "COPYSPY") copySPy = np.around(, decimals=5) TOOL.ALLP(copySPy, "COPYSPY_Round") copySPy = torch.tensor(copySPy) copySPy = copySPy[:, :, 1:] # 맨뒤의 값을 자름. # TOOL.ALLP(, "copySPy Next") # copySComp copySCompLastVal = copySComp[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. # TOOL.ALLP(, "COPYSCOMP") # copySpy와 다르게 copy SComp는 이전의 제어 값을 그대로 사용함. #TODO # 자기자신 자체 copySCompLastVal = tensor( [[[round(save_ragular_para[3], 2)], [round(save_ragular_para[4], 2)]]]) copySComp = (copySComp, copySCompLastVal), dim=2) # 본래 텐서에 값을 더함. # 반올림 copySComp = np.around(, decimals=3) copySComp = torch.tensor(copySComp) copySComp = copySComp[:, :, 1:] # 맨뒤의 값을 자름. # 결과값 Recode copyRecodBox["ZINST58"].append( copySPyLastVal[0, 0, 0].item()) copyRecodBox["ZINST63"].append( copySPyLastVal[0, 1, 0].item()) copyRecodBox["ZVCT"].append( copySPyLastVal[0, 2, 0].item()) copyRecodBox["BFV122"].append( copySComp[0, 0, 0].item()) copyRecodBox["BPV145"].append( copySComp[0, 1, 0].item()) # 예지 종료 결과값 Recode 그래픽화 [ self.ax_dict[i_].plot( ProgRecodBox[i_] + copyRecodBox[i_], label=f"{i_}_{__}") for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145" ] ] # # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet() [ ProgRecodBox[i_].append( round(self.CNS.mem[i_]['Val'], r_) / t_) for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro) ] # END Test Mode CODE [ self.ax_dict[i_].grid() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] [ self.ax_dict[i_].legend() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] [ self.fig_dict[i_].savefig( f"{self.CurrentIter}_{i_}.png") for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] print('END TEST') else: # Train Mode for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(fulltime): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] a_dict = {_: [] for _ in range(self.LocalNet.NubNET)} a_now = {_: 0 for _ in range(self.LocalNet.NubNET)} a_prob = {_: [] for _ in range(self.LocalNet.NubNET)} r_dict = {_: [] for _ in range(self.LocalNet.NubNET)} done_dict = { _: [] for _ in range(self.LocalNet.NubNET) } y_predict = { _: [] for _ in range(self.LocalNet.NubNET) } y_answer = {_: [] for _ in range(self.LocalNet.NubNET)} # Sampling for t in range(t_max): NetOut_dict = { _: 0 for _ in range(self.LocalNet.NubNET) } for nubNet in range(0, self.LocalNet.NubNET): # TOOL.ALLP(self.S_Py, 'S_Py') # TOOL.ALLP(self.S_Comp, 'S_Comp') NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=self.S_Py, x_comp=self.S_Comp) NetOut = NetOut.view(-1) # (1, 2) -> (2, ) # TOOL.ALLP(NetOut, 'Netout before Categorical') if nubNet < 6: act = torch.distributions.Categorical( NetOut).sample().item( ) # 2개 중 샘플링해서 값 int 반환 # TOOL.ALLP(act, 'act') NetOut = NetOut.tolist()[act] # TOOL.ALLP(NetOut, f'NetOut{nubNet}') NetOut_dict[nubNet] = NetOut # TOOL.ALLP(NetOut_dict, f'NetOut{nubNet}') a_now[nubNet] = act a_dict[nubNet].append([act]) a_prob[nubNet].append([NetOut]) else: y_predict[nubNet].append( # TOOL.ALLP(y_predict[nubNet], 'y_predict') spy_lst.append(self.S_Py.tolist() [0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist() [0]) # (1, 2, 10) -list> (2, 10) # old val to compare the new val ComparedPara = [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145" ] ComparedParaRound = [2, 2, 2, 2, 2] self.old_cns = { para: round(self.CNS.mem[para]['Val'], pr) for para, pr in zip(ComparedPara, ComparedParaRound) } # TOOL.ALLP(self.old_cns, "old_CNS") # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet() self.new_cns = { para: round(self.CNS.mem[para]['Val'], pr) for para, pr in zip(ComparedPara, ComparedParaRound) } y_answer_one = self.S_Py[:, :, -1:].data.reshape(3) # TOOL.ALLP(y_answer_one, "Answer_one") y_answer[6].append(y_answer_one.numpy()) y_answer_one = self.S_Comp[:, :, -1:].data.reshape(2) y_answer[7].append(y_answer_one.numpy()) # TOOL.ALLP(y_answer, "y_answer") # 보상 및 종료조건 계산 r = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0} pa = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0} for nubNet in range(0, 6): # 보상 네트워크별로 계산 및 저장 # for nubNet in range(0, self.LocalNet.NubNET): # 보상 네트워크별로 계산 및 저장 if nubNet == 0: if self.CNS.mem['KCNTOMS']['Val'] < maltime: if a_now[nubNet] == 1: # Malfunction r[nubNet] = -1 else: r[nubNet] = 1 else: if a_now[nubNet] == 1: # Malfunction r[nubNet] = 1 else: r[nubNet] = -1 else: predict_a = round( (a_now[nubNet] - 100) / 100, 2) pa[nubNet] = predict_a # TODO # 변수 타입에 따라서 로직 변화함. want_para_reward = { 1: "ZVCT", 2: "ZINST58", 3: "ZINST63", 4: "BFV122", 5: "BPV145" } if nubNet < 4: if self.new_cns[want_para_reward[ nubNet]] == self.old_cns[ want_para_reward[ nubNet]] + predict_a: r[nubNet] = 1 else: DeZVCT = self.new_cns[ want_para_reward[nubNet]] - ( self. old_cns[want_para_reward[ nubNet]] + predict_a) if DeZVCT < 0: # 예측된 값이 더 크다. # 12.2 - 12.1 -> 0.1 # r[nubNet] = 1 - ((self.old_cns[want_para_reward[nubNet]] + predict_a) - self.new_cns[want_para_reward[nubNet]]) r[nubNet] = -(( self. old_cns[want_para_reward[ nubNet]] + predict_a ) - self.new_cns[ want_para_reward[nubNet]]) else: # 예측된 값이 더 작다. # 12.2 - 12.1 -> 0.3 # r[nubNet] = 1 - ( - (self.old_cns[want_para_reward[nubNet]] + predict_a) + self.new_cns[want_para_reward[nubNet]]) r[nubNet] = -(-( self. old_cns[want_para_reward[ nubNet]] + predict_a ) + self.new_cns[ want_para_reward[nubNet]]) r[nubNet] = round( r[nubNet], 3) # 0.100 나와서 2자리에서 반올림. else: if self.new_cns[want_para_reward[ nubNet]] == predict_a: r[nubNet] = 1 else: DeZVCT = self.new_cns[ want_para_reward[ nubNet]] - predict_a if DeZVCT < 0: # 예측된 값이 더 크다. r[nubNet] = -( predict_a - self.new_cns[ want_para_reward[ nubNet]]) else: r[nubNet] = -( -predict_a + self.new_cns[ want_para_reward[ nubNet]]) r[nubNet] = round( r[nubNet], 3) # 0.100 나와서 3자리에서 반올림. r_dict[nubNet].append(r[nubNet]) # TOOL.ALLP(r[nubNet], "r_nubNet") # TOOL.ALLP(pa[nubNet], "pa_nubNet") # 종료 조건 계산 if __ == 14 and t == t_max - 1: done_dict[nubNet].append(0) done = True else: done_dict[nubNet].append(1) def dp_want_val(val, name): return f"{name}: {self.CNS.mem[val]['Val']:4.4f}" print( self.CurrentIter, f"{r[0]:6}|{r[1]:6}|{r[2]:6}|{r[3]:6}|{r[4]:6}|{r[5]:6}|", f'{NetOut_dict[0]:0.4f}', f'{NetOut_dict[1]:0.4f}', f'{NetOut_dict[2]:0.4f}', f'{NetOut_dict[3]:0.4f}', f'{NetOut_dict[4]:0.4f}', f'{NetOut_dict[5]:0.4f}', f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}", # dp_want_val('PVCT', 'VCT pressure'), f"VCT Level: {self.new_cns['ZVCT']}", f"{self.old_cns['ZVCT'] + pa[1]:5.2f} + {pa[1]:5.2f}", f"PZR pre: {self.new_cns['ZINST58']}", f"{self.old_cns['ZINST58'] + pa[2]:5.2f} + {pa[2]:5.2f}", f"PZR Level: {self.new_cns['ZINST63']}", f"{self.old_cns['ZINST63'] + pa[3]:5.2f} + {pa[3]:5.2f}", f"BFV122: {self.new_cns['BFV122']}", f"{self.new_cns['BFV122'] + pa[4]:5.2f} + {pa[4]:5.2f}", f"BFV122: {self.new_cns['BPV145']}", f"{self.new_cns['BPV145'] + pa[5]:5.2f} + {pa[5]:5.2f}", # dp_want_val('UPRT', 'PRT temp'), dp_want_val('ZINST48', 'PRT pressure'), # dp_want_val('ZINST36', 'Let-down flow'), dp_want_val('BFV122', 'Charging Valve pos'), # dp_want_val('BPV145', 'Let-down Valve pos'), ) # ================================================================================================== # Train gamma = 0.98 lmbda = 0.95 # 1 .. 10 spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) # 2 .. 10 + (1 Last value) spy_lst.append(self.S_Py.tolist()[0]) scomp_lst.append(self.S_Comp.tolist()[0]) spy_fin = torch.tensor(spy_lst[1:], dtype=torch.float) scomp_fin = torch.tensor(scomp_lst[1:], dtype=torch.float) # 각 네트워크 별 Advantage 계산 for nubNet in range(0, 6): # for nubNet in range(0, self.LocalNet.NubNET): # GAE # r_dict[nubNet]: (5,) -> (5,1) # Netout : (5,1) # done_dict[nubNet]: (5,) -> (5,1) td_target = torch.tensor(r_dict[nubNet], dtype=torch.float).view(t_max, 1) + \ gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \ torch.tensor(done_dict[nubNet], dtype=torch.float).view(t_max, 1) delta = td_target - self.LocalNet.NET[ nubNet].GetPredictCrticOut( spy_batch, scomp_batch) delta = delta.detach().numpy() adv_list = [] adv_ = 0.0 for reward in delta[::-1]: adv_ = gamma * adv_ * lmbda + reward[0] adv_list.append([adv_]) adv_list.reverse() adv = torch.tensor(adv_list, dtype=torch.float) PreVal = self.LocalNet.NET[ nubNet].GetPredictActorOut( spy_batch, scomp_batch) PreVal = PreVal.gather( 1, torch.tensor(a_dict[nubNet])) # PreVal_a # TOOL.ALLP(PreVal, f"Preval {nubNet}") # Ratio 계산 a/b == exp(log(a) - log(b)) # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}") Preval_old_a_prob = torch.tensor(a_prob[nubNet], dtype=torch.float) ratio = torch.exp( torch.log(PreVal) - torch.log(Preval_old_a_prob)) # TOOL.ALLP(ratio, f"ratio {nubNet}") # surr1, 2 eps_clip = 0.1 surr1 = ratio * adv surr2 = torch.clamp(ratio, 1 - eps_clip, 1 + eps_clip) * adv min_val = torch.min(surr1, surr2) smooth_l1_loss = nn.functional.smooth_l1_loss( self.LocalNet.NET[nubNet].GetPredictCrticOut( spy_batch, scomp_batch), td_target.detach()) loss = -min_val + smooth_l1_loss self.LocalOPT.NETOPT[nubNet].zero_grad() loss.mean().backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) # TOOL.ALLP(advantage.mean()) # print(self.CurrentIter, 'AgentNub: ', nubNet, # 'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(), # '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')') for nubNet in range(6, 8): y_predict_tensor = self.LocalNet.NET[ nubNet].GetPredictActorOut( spy_batch, scomp_batch) # TOOL.ALLP(y_predict[nubNet], "loss_y_predict") # TOOL.ALLP(y_answer[nubNet], "loss_y_predict") y_answer_tensor = torch.tensor(y_answer[nubNet], dtype=torch.float) # TOOL.ALLP(y_predict_tensor, "loss_y_predict") # TOOL.ALLP(y_answer_tensor, "loss_y_predict_ans") loss = nn.functional.mse_loss( y_predict_tensor, y_answer_tensor) self.LocalOPT.NETOPT[nubNet].zero_grad() # loss.mean().backward() loss.backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) print('DONE EP') break
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = NETBOX() for _ in range(0, self.LocalNet.NubNET): self.LocalNet.NET[_].load_state_dict( self.GlobalNet.NET[_].state_dict()) self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET) # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63'] self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = { _: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara } def MakeStateSet(self): # 값을 쌓음 (return Dict) [ self.PhyState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara ] [ self.COMPState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara ] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor( [self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val / 1000, 7) # 가압기 압력 if para == 'ZINST63': val = round(val / 100, 7) # 가압기 수위 return val # ============================================================================================================== def run(self): while True: size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5 self.CNS.reset(initial_nub=1, mal=True, mal_case=36, mal_opt=size, mal_time=maltime) print(f'DONE initial {size}, {maltime}') # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 print(self.CurrentIter) # Initial done = False self.InitialStateSet() while not done: for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(15): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] a_dict = {_: [] for _ in range(self.LocalNet.NubNET)} r_dict = {_: [] for _ in range(self.LocalNet.NubNET)} # Sampling for t in range(5): TimeDB = { 'Netout': {}, # 0: .. 1:.. } for nubNet in range(self.LocalNet.NubNET): NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp) NetOut = NetOut.view(-1) # (1, 2) -> (2, ) act = torch.distributions.Categorical( NetOut).sample().item() # 2개 중 샘플링해서 값 int 반환 # TOOL.ALLP(act, 'act') NetOut = NetOut.tolist()[act] # TOOL.ALLP(NetOut, 'NetOut') TimeDB['Netout'][nubNet] = NetOut a_dict[nubNet].append([act]) spy_lst.append( self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist() [0]) # (1, 2, 10) -list> (2, 10) # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet() # 보상 계산 r = {0: 0, 1: 0} for nubNet in range( self.LocalNet.NubNET): # 보상 네트워크별로 계산 및 저장 if self.CNS.mem['KCNTOMS']['Val'] < maltime: if act == 1: # Malfunction r[nubNet] = -1 else: r[nubNet] = 1 else: if act == 1: # Malfunction r[nubNet] = 1 else: r[nubNet] = -1 r_dict[nubNet].append(r[nubNet]) print(self.CurrentIter, r[0], NetOut) # ================================================================================================== # Train gamma = 0.98 spy_fin = self.S_Py # (1, 2, 10) Last value scomp_fin = self.S_Comp # (1, 2, 10) Last value spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) # 각 네트워크 별 Advantage 계산 for nubNet in range(self.LocalNet.NubNET): R = 0.0 if done else self.LocalNet.NET[ nubNet].GetPredictCrticOut(spy_fin, scomp_fin).item() td_target_lst = [] for reward in r_dict[nubNet][::-1]: R = gamma * R + reward td_target_lst.append([R]) td_target_lst.reverse() td_target = torch.tensor(td_target_lst) value = self.LocalNet.NET[nubNet].GetPredictCrticOut( spy_batch, scomp_batch) advantage = td_target - value PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut( spy_batch, scomp_batch) Preval_a = PreVal.gather(1, torch.tensor(a_dict[nubNet])) loss = -torch.log(Preval_a) * advantage.detach() + \ nn.functional.smooth_l1_loss(self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_batch, scomp_batch), td_target.detach()) self.LocalOPT.NETOPT[nubNet].zero_grad() loss.mean().backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) # TOOL.ALLP(advantage.mean()) print(self.CurrentIter, 'AgentNub: ', nubNet, 'adv: ', advantage.mean().item(), 'loss: ', loss.mean().item()) print('DONE EP') break
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = PPOModel(nub_para=2, time_leg=10) self.LocalNet.load_state_dict(GlobalNet.state_dict()) self.optimizer = optim.Adam(GlobalNet.parameters(), lr=learning_rate) # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63'] self.PhyState = {_:deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara} def MakeStateSet(self): # 값을 쌓음 (return Dict) [self.PhyState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara] [self.COMPState[_].append(self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor([self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val/1000, 7) # 가압기 압력 if para == 'ZINST63': val = round(val/100, 7) # 가압기 수위 return val # ============================================================================================================== def run(self): while True: self.CNS.init_cns(initial_nub=1) time.sleep(1) # self.CNS._send_malfunction_signal(12, 100100, 15) # time.sleep(1) # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 print(self.CurrentIter) # Initial done = False self.InitialStateSet() while not done: for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(15): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] # Sampling for t in range(5): PreVal = self.LocalNet.GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp) PreVal = PreVal.tolist()[0] # (1, 2)-> (2. ) spy_lst.append(self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist()[0]) # (1, 2, 10) -list> (2, 10) a_lst.append(PreVal) # (2, ) old_before = {0: 0, 1: 0} for nub_val in range(0, 2): old_before[nub_val] = self.S_ONE_Py[nub_val] + PreVal[nub_val] self.CNS.run_freeze_CNS() self.MakeStateSet() r = {0: 0, 1: 0} for nub_val in range(0, 2): if self.S_ONE_Py[nub_val] - 0.0001 < old_before[nub_val] < self.S_ONE_Py[nub_val] + 0.0001: r[nub_val] = 0.1 else: r[nub_val] = -0.1 if r[0] == 0.1 and r[1] == 0.1: t_r = 0.1 else: t_r = -0.1 # t_r = r[0] + r[1] r_lst.append(t_r) print(self.CurrentIter, PreVal, self.S_ONE_Py[0] - 0.0001, old_before[0], self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|', self.S_ONE_Py[1] - 0.0001, old_before[1], self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|', r[0], r[1], t_r) # Train! # print('Train!!!') # GAE spy_fin = self.S_Py # (1, 2, 10) scomp_fin = self.S_Comp # (1, 2, 10) R = 0.0 if done else self.LocalNet.GetPredictCrticOut(spy_fin, scomp_fin).item() td_target_lst = [] for reward in r_lst[::-1]: R = gamma * R + reward td_target_lst.append([R]) td_target_lst.reverse() # Batch 만들기 spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) a_batch = torch.tensor(a_lst, dtype=torch.float) td_target = torch.tensor(td_target_lst) value = self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch) advantage = td_target - value PreVal = self.LocalNet.GetPredictActorOut(x_py=spy_batch, x_comp=scomp_batch) loss = -torch.log(PreVal) * advantage.detach() + \ nn.functional.smooth_l1_loss(self.LocalNet.GetPredictCrticOut(spy_batch, scomp_batch), td_target.detach()) # Loss Display self.optimizer.zero_grad() loss.mean().backward() for global_param, local_param in zip(self.GlobalNet.parameters(), self.LocalNet.parameters()): global_param._grad = local_param.grad self.optimizer.step() self.LocalNet.load_state_dict(self.GlobalNet.state_dict()) break print('Done')
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Work info self.W = Work_info() # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port, Max_len=self.W.TimeLeg) self.CNS.LoggerPath = 'DB' # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act=0): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def PreProcessing(self): pass def CNSStep(self): self.CNS.run_freeze_CNS() # CNS에 취득한 값을 메모리에 업데이트 self.PreProcessing() # 취득된 값에 기반하여 db_add.txt의 변수명에 해당하는 값을 재처리 및 업데이트 self.CNS._append_val_to_list() # 최종 값['Val']를 ['List']에 저장 def run(self): while True: # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 # Mal function initial # size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5 # mal_case = 36 try: # 1: {'Case': 0, 'Opt': 0, 'Time': 0} size = self.W.mal_list[self.CurrentIter]['Opt'] maltime = self.W.mal_list[self.CurrentIter]['Time'] mal_case = self.W.mal_list[self.CurrentIter]['Case'] mal_case2 = self.W.mal_list[self.CurrentIter]['Case2'] mal_opt2 = self.W.mal_list[self.CurrentIter]['Opt2'] mal_time2 = self.W.mal_list[self.CurrentIter]['Time2'] file_name = f'{mal_case}_{size}_{maltime}_{mal_case2}_{mal_opt2}_{mal_time2}' # CNS initial self.CNS.reset(initial_nub=1, mal=True, mal_case=mal_case, mal_opt=size, mal_time=maltime, # mal_case2=mal_case2, mal_opt2=mal_opt2, mal_time2=mal_time2, file_name=file_name) time.sleep(1) # self.CNS._send_malfunction_signal(Mal_nub=mal_case2, Mal_opt=mal_opt2, Mal_time=mal_time2) # time.sleep(2) print(f'DONE initial {file_name}') while True: # 초기 제어 Setting 보내기 # self.send_action() # time.sleep(1) # Train Mode # Time Leg 만큼 데이터 수집만 수행 for t in range(self.W.TimeLeg + 1): self.CNSStep() # Mal_nub, Mal_opt, Mal_time): # if t == 0: # self.CNS._send_malfunction_signal(Mal_nub=mal_case2, Mal_opt=mal_opt2, Mal_time=mal_time2) # time.sleep(100) print('DONE EP') break except: break print('END')
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = NETBOX() # 부모 네트워크의 정보를 자식 네트워크로 업데이트 for _ in range(0, self.LocalNet.NubNET): self.LocalNet.NET[_].load_state_dict( self.GlobalNet.NET[_].state_dict()) # 옵티마이저 생성 self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET) # Work info self.W = Work_info() # RLMem info self.RLMem = RLMem(net_nub=self.LocalNet.NubNET) # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port, Max_len=self.W.TimeLeg) self.CNS.LoggerPath = 'V6_1_EOP' # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # 사용되는 파라메터 self.PARA_info = { # 변수명 : {'Div': 몇으로 나눌 것인지, 'Round': 반올림, 'Type': 어디에 저장할 것인지.} 'ZINST58': { 'Div': 1000, 'Round': 5, 'Type': 'P' }, 'ZINST63': { 'Div': 100, 'Round': 4, 'Type': 'P' }, 'ZVCT': { 'Div': 100, 'Round': 4, 'Type': 'P' }, 'BFV122': { 'Div': 1, 'Round': 2, 'Type': 'F' }, 'BPV145': { 'Div': 1, 'Round': 2, 'Type': 'F' }, 'BPV122C': { 'Div': 2, 'RoCNSnd': 2, 'Type': 'C' }, 'BPV145C': { 'Div': 2, 'Round': 2, 'Type': 'C' }, } ## 사용되는 파라메터가 db_add.txt에 있는지 확인하는 모듈 if self.mem['Iter'] == 0: # 사용되는 파라메터가 db_add.txt에 있는지 체크 for _ in self.PARA_info.keys(): if not f'v{_}' in self.CNS.mem.keys(): print(f'v{_} 값이 없음 db_add.txt에 추가할 것') # 역으로 db_add에 있으나 사용되지 않은 파라메터 출력 for _ in self.CNS.mem.keys(): if _[0] == 'v': # 첫글자가 v이면.. if not _[1:] in self.PARA_info.keys(): print(f'{_} 값이 없음 self.PARA_info에 추가할 것') ## ----------------------------------------------- # GP Setting # self.fig_dict = {i_: plt.figure(figsize=(13, 13)) for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]} # self.ax_dict = {i_: self.fig_dict[i_].add_subplot() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]} print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act=0, BFV122=0, PV145=0): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] if act == 0: self.send_action_append(["KSWO100", "KSWO89"], [1, 1]) # BFV122 Man, PV145 Man if PV145 == 0: self.send_action_append(["KSWO90", "KSWO91"], [0, 0]) # PV145 Stay elif PV145 == 1: self.send_action_append(["KSWO90", "KSWO91"], [0, 1]) # PV145 Up elif PV145 == 2: self.send_action_append(["KSWO90", "KSWO91"], [1, 0]) # PV145 Down if BFV122 == 0: self.send_action_append(["KSWO101", "KSWO102"], [0, 0]) # BFV122 Stay elif BFV122 == 1: self.send_action_append(["KSWO101", "KSWO102"], [0, 1]) # BFV122 Up elif BFV122 == 2: self.send_action_append(["KSWO101", "KSWO102"], [1, 0]) # BFV122 Down # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def PreProcessing(self): # Network용 입력 값 재처리 for k in self.PARA_info.keys(): if self.PARA_info[k]['Type'] != 'C': # Control 변수를 제외한 변수만 재처리 self.CNS.mem[f'v{k}']['Val'] = TOOL.RoundVal( self.CNS.mem[k]['Val'], self.PARA_info[k]['Div'], self.PARA_info[k]['Round']) # Network에 사용되는 값 업데이트 if True: # Tensor로 전환 # self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) S_py_list, S_Comp_list = [], [] for k in self.PARA_info.keys(): if self.PARA_info[f'{k}']['Type'] == 'P': S_py_list.append(self.CNS.mem[f'{k}']['List']) if self.PARA_info[f'{k}']['Type'] == 'F': S_Comp_list.append(self.CNS.mem[f'{k}']['List']) self.S_Py = torch.tensor(S_py_list) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor(S_Comp_list) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) def CNSStep(self): self.CNS.run_freeze_CNS() # CNS에 취득한 값을 메모리에 업데이트 self.PreProcessing() # 취득된 값에 기반하여 db_add.txt의 변수명에 해당하는 값을 재처리 및 업데이트 self.CNS._append_val_to_list() # 최종 값['Val']를 ['List']에 저장 def run(self): while True: # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 # Mal function initial size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5 # CNS initial self.CNS.reset(initial_nub=1, mal=True, mal_case=36, mal_opt=size, mal_time=maltime, file_name=self.CurrentIter) print(f'DONE initial {size}, {maltime}') # 진단 모듈 Tester ! if self.CurrentIter != 0 and self.CurrentIter % 100 == 0: print(self.CurrentIter, 'Yes Test') self.PrognosticMode = True else: print(self.CurrentIter, 'No Test') self.PrognosticMode = False # Initial done = False # GP 이전 데이터 Clear # [self.ax_dict[i_].clear() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT"]] while not done: fulltime = 2 t_max = 2 # total iteration = fulltime * t_max ep_iter = 0 tun = [1000, 100, 100, 1, 1] ro = [5, 4, 4, 2, 2] # ProgRecodBox = {"Time": [], "ZINST58": [], "ZINST63": [], "ZVCT": [], "BFV122": [], "BPV145": [], "BFV122_CONT": [], "BPV145_CONT": []} # recode 초기화 # Timer = 0 if self.PrognosticMode: # TODO 작업 필요함... 0817 for i in range(0, 2): if i == 0: # Automode # 초기 제어 Setting 보내기 self.send_action() time.sleep(1) # Test Mode for save_time_leg in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() Timer, ProgRecodBox = self.Recode( ProgRecodBox, Timer, S_Py=self.S_Py, S_Comp=self.S_Comp) for t in range(fulltime * t_max): # total iteration if t == 0 or t % 10 == 0: # 0스텝 또는 10 스텝마다 예지 copySPy, copySComp = copy.deepcopy( self.S_Py), copy.deepcopy( self.S_Comp) # 내용만 Copy copyRecodBox = copy.deepcopy(ProgRecodBox) Temp_Timer = copy.deepcopy(Timer) for PredictTime in range( t, fulltime * t_max): # 시간이 갈수록 예지하는 시간이 줄어듬. save_ragular_para = { _: 0 for _ in range(self.LocalNet.NubNET) } # 예지된 값 생산 for nubNet in range( 0, self.LocalNet.NubNET): NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=copySPy, x_comp=copySComp) NetOut = NetOut.view( -1) # (1, 2) -> (2, ) act_ = NetOut.argmax().item( ) # 행열에서 최대값을 추출 후 값 반환 if nubNet in [0, 6, 7]: save_ragular_para[nubNet] = act_ elif nubNet in [1]: save_ragular_para[nubNet] = round( (act_ - 100) / 100000, 5) elif nubNet in [2, 3]: save_ragular_para[nubNet] = round( (act_ - 100) / 10000, 4) elif nubNet in [4, 5]: save_ragular_para[nubNet] = round( (act_ - 100) / 100, 2) # 예지된 값 저장 및 종료 # copySPyLastVal = copySPy[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. add_val = tensor([[[save_ragular_para[1]], [save_ragular_para[2]], [save_ragular_para[3]]] ]) copySPyLastVal = copySPyLastVal + add_val # 마지막 변수에 예측된 값을 더해줌. copySPy = (copySPy, copySPyLastVal), dim=2) # 본래 텐서에 값을 더함. # copySPy = torch.tensor(copySPy) copySPy = copySPy[:, :, 1:] # 맨뒤의 값을 자름. copySCompLastVal = copySComp[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. copySCompLastVal = tensor([[ [save_ragular_para[4]], [save_ragular_para[5]], [save_ragular_para[6] / 2], [save_ragular_para[7] / 2], ]]) copySComp = (copySComp, copySCompLastVal), dim=2) # 본래 텐서에 값을 더함. # copySComp = torch.tensor(copySComp) copySComp = copySComp[:, :, 1:] # 맨뒤의 값을 자름. # Recode Temp_Timer, copyRecodBox = self.Recode( copyRecodBox, Temp_Timer, S_Py=copySPy, S_Comp=copySComp) # 예지 종료 결과값 Recode 그래픽화 [ self.ax_dict[i_].plot(copyRecodBox["Time"], copyRecodBox[i_], label=f"{i_}_{t}") for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT" ] ] [ self.fig_dict[i_].savefig( f"{i_}_{self.CurrentIter}_{t}.png") for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT" ] ] a_now = {_: 0 for _ in range(self.LocalNet.NubNET)} for nubNet in range(0, self.LocalNet.NubNET): # TOOL.ALLP(self.S_Py, 'S_Py') # TOOL.ALLP(self.S_Comp, 'S_Comp') NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=self.S_Py, x_comp=self.S_Comp) NetOut = NetOut.view(-1) # (1, 2) -> (2, ) # TOOL.ALLP(NetOut, 'Netout before Categorical') act = torch.distributions.Categorical( NetOut).sample().item( ) # 2개 중 샘플링해서 값 int 반환 if nubNet in [0, 6, 7]: a_now[nubNet] = act elif nubNet in [1]: a_now[nubNet] = round((act - 100) / 100000, 5) elif nubNet in [2, 3]: a_now[nubNet] = round((act - 100) / 10000, 4) elif nubNet in [4, 5]: a_now[nubNet] = round((act - 100) / 100, 2) # Send Act to CNS! self.send_action(act=0, BFV122=a_now[6], PV145=a_now[7]) # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet(BFV122=a_now[6], PV145=a_now[7]) # Recode Timer, ProgRecodBox = self.Recode( ProgRecodBox, Timer, S_Py=self.S_Py, S_Comp=self.S_Comp) # END Test Mode CODE [ self.ax_dict[i_].grid() for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT" ] ] [ self.ax_dict[i_].legend() for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT" ] ] if i == 0: [ self.fig_dict[i_].savefig( f"{i_}_{self.CurrentIter}_M.png") for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT" ] ] else: [ self.fig_dict[i_].savefig( f"{i_}_{self.CurrentIter}_A.png") for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145", "BFV122_CONT", "BPV145_CONT" ] ] print('END TEST') else: # Train Mode # 초기 제어 Setting 보내기 self.send_action() time.sleep(1) # Time Leg 만큼 데이터 수집만 수행 for t in range(self.W.TimeLeg + 1): self.CNSStep() # 실제 훈련 시작 부분 for __ in range(fulltime): # spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] # a_dict = {_: [] for _ in range(self.LocalNet.NubNET)} # a_now = {_: 0 for _ in range(self.LocalNet.NubNET)} # a_now_orgin = {_: 0 for _ in range(self.LocalNet.NubNET)} # a_prob = {_: [] for _ in range(self.LocalNet.NubNET)} # # r_dict = {_: [] for _ in range(self.LocalNet.NubNET)} # done_dict = {_: [] for _ in range(self.LocalNet.NubNET)} # # y_predict = {_: [] for _ in range(self.LocalNet.NubNET)} # y_answer = {_: [] for _ in range(self.LocalNet.NubNET)} self.RLMem.CleanTrainMem() # Sampling for t in range(t_max): NetOut_dict = { _: 0 for _ in range(self.LocalNet.NubNET) } for nubNet in range(0, self.LocalNet.NubNET): # TOOL.ALLP(self.S_Py, 'S_Py') # TOOL.ALLP(self.S_Comp, 'S_Comp') # 입력 변수들에서 Actor 네트워크의 출력을 받음. NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=self.S_Py, x_comp=self.S_Comp) NetOut = NetOut.view(-1) # (1, 2) -> (2, ) # TOOL.ALLP(NetOut, 'Netout before Categorical') # act 계산 이때 act는 int 값. act = torch.distributions.Categorical( NetOut).sample().item( ) # 2개 중 샘플링해서 값 int 반환 # TOOL.ALLP(act, 'act') # act의 확률 값을 반환 NetOut = NetOut.tolist()[act] # TOOL.ALLP(NetOut, f'NetOut{nubNet}') # act와 확률 값 저장 self.RLMem.SaveNetOut(nubNet, NetOut, act) # NetOut_dict[nubNet] = NetOut # TOOL.ALLP(NetOut_dict, f'NetOut{nubNet}') modify_act = 0 if nubNet in [0, 6, 7]: modify_act = act elif nubNet in [1]: modify_act = round((act - 100) / 100000, 5) elif nubNet in [2, 3]: modify_act = round((act - 100) / 10000, 4) elif nubNet in [4, 5]: modify_act = round((act - 100) / 100, 2) # 수정된 act 저장 <- 주로 실제 CNS의 제어 변수에 이용하기 위해서 사용 self.RLMem.SaveModNetOut(nubNet, modify_act) # a_now_orgin[nubNet] = act # a_dict[nubNet].append([act]) # for training # a_prob[nubNet].append([NetOut]) # for training # 훈련용 상태 저장 self.RLMem.SaveState(self.S_Py, self.S_Comp) # spy_lst.append(self.S_Py.tolist()[0]) # (1, 3, 15) -list> (3, 15) # scomp_lst.append(self.S_Comp.tolist()[0]) # (1, 3, 15) -list> (3, 15) # old val to compare the new val self.old_phys = self.S_Py[:, :, -1:].data.reshape( 3).tolist() # (3,) self.old_comp = self.S_Comp[:, :, -1:].data.reshape( 2).tolist() # (3,) self.old_cns = [ # "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145" round(self.old_phys[0], 5), round(self.old_phys[1], 4), round(self.old_phys[2], 4), round(self.old_comp[0], 2), round(self.old_comp[1], 2) ] # TOOL.ALLP(self.old_cns, "old_CNS") # Send Act to CNS! self.send_action(act=0, BFV122=self.RLMem.GetAct(6), PV145=self.RLMem.GetAct(7)) # CNS + 1 Step self.CNS.run_freeze_CNS() # self.MakeStateSet(BFV122=a_now[6], PV145=a_now[7]) self.new_phys = self.S_Py[:, :, -1:].data.reshape( 3).tolist() # (3,) self.new_comp = self.S_Comp[:, :, -1:].data.reshape( 2).tolist() # (3,) self.new_cns = [ # "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145" round(self.new_phys[0], 5), round(self.new_phys[1], 4), round(self.new_phys[2], 4), round(self.new_comp[0], 2), round(self.new_comp[1], 2) ] # Recode # Timer, ProgRecodBox = self.Recode(ProgRecodBox, Timer, S_Py=self.S_Py, S_Comp=self.S_Comp) # 보상 및 종료조건 계산 # r = {_: 0 for _ in range(0, self.LocalNet.NubNET)} for nubNet in range( 0, self.LocalNet.NubNET): # 보상 네트워크별로 계산 및 저장 if nubNet in [0]: if self.CNS.mem['KCNTOMS']['Val'] < maltime: if self.RLMem.int_mod_action[ nubNet] == 1: # Malfunction self.RLMem.SaveReward(nubNet, -1) else: self.RLMem.SaveReward(nubNet, 1) else: if self.RLMem.int_mod_action[ nubNet] == 1: # Malfunction self.RLMem.SaveReward(nubNet, 1) else: self.RLMem.SaveReward(nubNet, -1) elif nubNet in [1, 2, 3]: Dealta = self.new_cns[nubNet - 1] - ( self.old_cns[nubNet - 1] + self.RLMem.int_mod_action[nubNet]) bound = {1: 0.00001, 2: 0.0001, 3: 0.0001} if Dealta < -bound[nubNet]: self.RLMem.SaveReward(nubNet, -1) # r[nubNet] = - ((self.old_cns[nubNet - 1] + self.RLMem.int_mod_action[nubNet]) - self.new_cns[nubNet-1]) elif Dealta > bound[nubNet]: self.RLMem.SaveReward(nubNet, -1) # r[nubNet] = - (- (self.old_cns[nubNet - 1] + self.RLMem.int_mod_action[nubNet]) + self.new_cns[nubNet - 1]) else: self.RLMem.SaveReward(nubNet, 1) # TOOL.ALLP(Dealta, f"Dealta") # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet") # if r[nubNet] == 1: # pass # else: # if nubNet in [1]: # r[nubNet] = round(round(r[nubNet], 5) * 1000, 2) # 0.000__ => 0.__ # elif nubNet in [2, 3]: # r[nubNet] = round(round(r[nubNet], 4) * 100, 2) # 0.00__ => 0.__ # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet round") # print(self.new_cns[nubNet-1], self.old_cns[nubNet-1], self.RLMem.int_mod_action[nubNet]) elif nubNet in [4, 5]: Dealta = self.new_cns[ nubNet - 1] - self.RLMem.int_mod_action[nubNet] if Dealta < -0.01: # r[nubNet] = - ((self.RLMem.int_mod_action[nubNet]) - self.new_cns[nubNet - 1]) self.RLMem.SaveReward(nubNet, -1) elif Dealta > 0.01: # r[nubNet] = - (- (self.RLMem.int_mod_action[nubNet]) + self.new_cns[nubNet - 1]) self.RLMem.SaveReward(nubNet, -1) else: self.RLMem.SaveReward(nubNet, 1) # TOOL.ALLP(Dealta, f"Dealta") # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet") # r[nubNet] = round(r[nubNet], 3) # TOOL.ALLP(r[nubNet], f"{nubNet} R nubnet round") # print(self.new_cns[nubNet - 1], self.old_cns[nubNet - 1], self.RLMem.int_mod_action[nubNet]) elif nubNet in [6, 7]: Dealta = self.new_cns[ 1] - 0.55 # normal PZR level # 0.30 - 0.55 = - 0.25 # 0.56 - 0.55 = 0.01 if Dealta < -0.005: # 0.53 - 0.55 = - 0.02 self.RLMem.SaveReward( nubNet, (self.new_cns[1] - 0.55) * 10) # # 0.53 - 0.55 = - 0.02 elif Dealta > 0.005: # 0.57 - 0.55 = 0.02 self.RLMem.SaveReward( nubNet, (0.55 - self.new_cns[1]) * 10) # 0.55 - 0.57 = - 0.02 else: self.RLMem.SaveReward(nubNet, 1) # r_dict[nubNet].append(r[nubNet]) # 종료 조건 계산 if __ == 14 and t == t_max - 1: done = True self.RLMem.SaveDone(nubNet, done) def dp_want_val(val, name): return f"{name}: {self.CNS.mem[val]['Val']:4.4f}" DIS = f"[{self.CurrentIter:3}]" + f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}|" # for _ in r.keys(): # DIS += f"{r[_]:6} |" # for _ in NetOut_dict.keys(): # DIS += f"[{NetOut_dict[_]:0.4f}-{self.RLMem.int_mod_action[_]:4}]" # for para, _ in zip(["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"], [0, 1, 2, 3, 4]): # DIS += f"| {para}: {self.old_cns[_]:5.2f} | {self.new_cns[_]:5.2f}" print(DIS) # Logger TOOL.log_add(file_name=f"{}.txt", ep=self.CurrentIter, ep_iter=ep_iter, x=self.old_cns) ep_iter += 1 # ================================================================================================== # Train gamma = 0.98 lmbda = 0.95 # 1 .. 10 spy_batch, scomp_batch = self.RLMem.GetBatch() # 2 .. 10 + (1 Last value) spy_fin, scomp_fin = self.RLMem.GetFinBatch( self.S_Py, self.S_Comp) # 각 네트워크 별 Advantage 계산 # for nubNet in range(0, 6): for nubNet in range(0, self.LocalNet.NubNET): # GAE # r_dict[nubNet]: (5,) -> (5,1) # Netout : (5,1) # done_dict[nubNet]: (5,) -> (5,1) td_target = torch.tensor(self.RLMem.list_reward_temp[nubNet], dtype=torch.float).view(t_max, 1) + \ gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \ torch.tensor(self.RLMem.list_done_temp[nubNet], dtype=torch.float).view(t_max, 1) delta = td_target - self.LocalNet.NET[ nubNet].GetPredictCrticOut( spy_batch, scomp_batch) delta = delta.detach().numpy() adv_list = [] adv_ = 0.0 for reward in delta[::-1]: adv_ = gamma * adv_ * lmbda + reward[0] adv_list.append([adv_]) adv_list.reverse() adv = torch.tensor(adv_list, dtype=torch.float) PreVal = self.LocalNet.NET[ nubNet].GetPredictActorOut( spy_batch, scomp_batch) PreVal = PreVal.gather( 1, torch.tensor( self.RLMem.list_action_temp[nubNet]) ) # PreVal_a # TOOL.ALLP(PreVal, f"Preval {nubNet}") # Ratio 계산 a/b == exp(log(a) - log(b)) # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}") Preval_old_a_prob = torch.tensor( self.RLMem.list_porb_action_temp[nubNet], dtype=torch.float) ratio = torch.exp( torch.log(PreVal) - torch.log(Preval_old_a_prob)) # TOOL.ALLP(ratio, f"ratio {nubNet}") # surr1, 2 eps_clip = 0.1 surr1 = ratio * adv surr2 = torch.clamp(ratio, 1 - eps_clip, 1 + eps_clip) * adv min_val = torch.min(surr1, surr2) smooth_l1_loss = nn.functional.smooth_l1_loss( self.LocalNet.NET[nubNet].GetPredictCrticOut( spy_batch, scomp_batch), td_target.detach()) loss = -min_val + smooth_l1_loss self.LocalOPT.NETOPT[nubNet].zero_grad() loss.mean().backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) # TOOL.ALLP(advantage.mean()) # print(self.CurrentIter, 'AgentNub: ', nubNet, # 'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(), # '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')') print('DONE EP') break
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = NETBOX() for _ in range(0, self.LocalNet.NubNET): self.LocalNet.NET[_].load_state_dict( self.GlobalNet.NET[_].state_dict()) self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET) # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63'] self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = { _: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara } def MakeStateSet(self): # 값을 쌓음 (return Dict) [ self.PhyState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara ] [ self.COMPState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara ] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor( [self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val / 1000, 7) # 가압기 압력 if para == 'ZINST63': val = round(val / 100, 7) # 가압기 수위 return val # ============================================================================================================== def run(self): while True: self.CNS.init_cns(initial_nub=1) print('DONE initial') time.sleep(1) # self.CNS._send_malfunction_signal(12, 100100, 15) # time.sleep(1) # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 print(self.CurrentIter) # Initial done = False self.InitialStateSet() while not done: for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(15): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] a_dict = {_: [] for _ in range(self.LocalNet.NubNET)} r_dict = {_: [] for _ in range(self.LocalNet.NubNET)} # Sampling for t in range(5): TimeDB = { 'Netout': {}, # 0: .. 1:.. } for nubNet in range(self.LocalNet.NubNET): NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp) NetOut = NetOut.tolist()[0][ 0] # (1, 1) -> (1, ) -> () TimeDB['Netout'][nubNet] = NetOut a_dict[nubNet] = NetOut spy_lst.append( self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist() [0]) # (1, 2, 10) -list> (2, 10) old_before = {0: 0, 1: 0} for nubNet in range(self.LocalNet.NubNET): old_before[nubNet] = self.S_ONE_Py[ nubNet] + TimeDB['Netout'][nubNet] self.CNS.run_freeze_CNS() self.MakeStateSet() r = {0: 0, 1: 0} for nub_val in range(0, 2): if self.S_ONE_Py[nub_val] - 0.0001 < old_before[ nub_val] < self.S_ONE_Py[nub_val] + 0.0001: r[nub_val] = 1 else: r[nub_val] = 0 if r[0] == 0.1 and r[1] == 0.1: t_r = 0.1 else: t_r = -0.1 # t_r = r[0] + r[1] # r_lst.append(t_r) for nubNet in range( self.LocalNet.NubNET): # 보상 네트워크별로 저장 r_dict[nubNet].append(r[nubNet]) print(self.CurrentIter, TimeDB['Netout'], self.S_ONE_Py[0] - 0.0001, old_before[0], self.S_ONE_Py[0], self.S_ONE_Py[0] + 0.0001, '|', self.S_ONE_Py[1] - 0.0001, old_before[1], self.S_ONE_Py[1], self.S_ONE_Py[1] + 0.0001, '|', r[0], r[1], t_r) # ================================================================================================== # Train gamma = 0.98 spy_fin = self.S_Py # (1, 2, 10) scomp_fin = self.S_Comp # (1, 2, 10) spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) # 각 네트워크 별 Advantage 계산 for nubNet in range(self.LocalNet.NubNET): R = 0.0 if done else self.LocalNet.NET[ nubNet].GetPredictCrticOut(spy_fin, scomp_fin).item() td_target_lst = [] for reward in r_dict[nubNet][::-1]: R = gamma * R + reward td_target_lst.append([R]) td_target_lst.reverse() td_target = torch.tensor(td_target_lst) value = self.LocalNet.NET[nubNet].GetPredictCrticOut( spy_batch, scomp_batch) advantage = td_target - value PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut( spy_batch, scomp_batch) loss = -torch.log(PreVal) * advantage.detach() + \ nn.functional.smooth_l1_loss(self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_batch, scomp_batch), td_target.detach()) self.LocalOPT.NETOPT[nubNet].zero_grad() loss.mean().backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) # TOOL.ALLP(advantage.mean()) print(self.CurrentIter, 'adv: ', advantage.mean().item(), 'loss: ', loss.mean().item()) print('DONE EP') break
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = NETBOX() for _ in range(0, self.LocalNet.NubNET): self.LocalNet.NET[_].load_state_dict( self.GlobalNet.NET[_].state_dict()) self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET) # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() # GP Setting self.fig_dict = { i_: plt.figure(figsize=(13, 13)) for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] } self.ax_dict = { i_: self.fig_dict[i_].add_subplot() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] } print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63', 'ZVCT'] self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = { _: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara } def MakeStateSet(self): # 값을 쌓음 (return Dict) [ self.PhyState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara ] [ self.COMPState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara ] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor( [self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val / 1000, 6) # 가압기 압력 if para == 'ZINST63': val = round(val / 100, 6) # 가압기 수위 if para == 'ZVCT': val = round(val / 100, 5) # VCT 수위 return val # ============================================================================================================== def run(self): while True: size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5 self.CNS.reset(initial_nub=1, mal=True, mal_case=36, mal_opt=size, mal_time=maltime) print(f'DONE initial {size}, {maltime}') # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 # 진단 모듈 Tester ! if self.CurrentIter != 0 and self.CurrentIter % 15 == 0: print(self.CurrentIter, 'Yes Test') self.PrognosticMode = True else: print(self.CurrentIter, 'No Test') self.PrognosticMode = False # Initial done = False self.InitialStateSet() # GP 이전 데이터 Clear [ self.ax_dict[i_].clear() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] while not done: fulltime = 15 t_max = 5 # total iteration = fulltime * t_max tun = [1000, 100, 100, 1, 1] ro = [1, 1, 1, 2, 2] ProgRecodBox = { "ZINST58": [], "ZINST63": [], "ZVCT": [], "BFV122": [], "BPV145": [] } # recode 초기화 if self.PrognosticMode: # Test Mode for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() [ ProgRecodBox[i_].append( round(self.CNS.mem[i_]['Val'], r_) / t_) for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro) ] for __ in range(fulltime * t_max): # total iteration if __ != 0 and __ % 10 == 0: # 10Step 마다 예지 # copy self.S_Py, self.S_Comp copySPy, copySComp = self.S_Py, self.S_Comp copyRecodBox = { "ZINST58": [], "ZINST63": [], "ZVCT": [], "BFV122": [], "BPV145": [] } # recode 초기화 # TOOL.ALLP(copyRecodBox["ZINST58"], "CopySPy") for PredictTime in range( __, fulltime * t_max): # 시간이 갈수록 예지하는 시간이 줄어듬. # 예지 시작 save_ragular_para = { _: 0 for _ in range(self.LocalNet.NubNET) } for nubNet in range(0, self.LocalNet.NubNET): NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=copySPy, x_comp=copySComp) NetOut = NetOut.view(-1) # (1, 2) -> (2, ) act_ = NetOut.argmax().item( ) # 행열에서 최대값을 추출 후 값 반환 if nubNet < 4: save_ragular_para[nubNet] = ( act_ - 10) / 10 # act_ 값이 값의 증감으로 변경 else: save_ragular_para[nubNet] = ( act_ - 100) / 100 # act_ 값이 값의 증감으로 변경 # TOOL.ALLP(save_ragular_para, "PARA") # copySPy, copySComp에 값 추가 # copySpy copySPyLastVal = copySPy[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. copySPyLastVal = copySPyLastVal + tensor([ [[save_ragular_para[0] / 1000], [save_ragular_para[1] / 100], [save_ragular_para[2] / 100]] ]) # 마지막 변수에 예측된 값을 더해줌. copySPy =, copySPyLastVal), dim=2) # 본래 텐서에 값을 더함. copySPy = copySPy[:, :, 1:] # 맨뒤의 값을 자름. # copySComp copySCompLastVal = copySComp[:, :, -1:] # [1, 3, 10] -> [1, 3, 1] 마지막 변수 가져옴. # copySpy와 다르게 copy SComp는 이전의 제어 값을 그대로 사용함. # copySCompLastVal = copySCompLastVal + tensor([[ # [save_ragular_para[3]], [save_ragular_para[4]], # ]]) # 마지막 변수에 예측된 값을 더해줌. #TODO # 자기자신 자체 copySCompLastVal = tensor( [[[save_ragular_para[3]], [save_ragular_para[4]]]]) copySComp = (copySComp, copySCompLastVal), dim=2) # 본래 텐서에 값을 더함. copySComp = copySComp[:, :, 1:] # 맨뒤의 값을 자름. # 결과값 Recode copyRecodBox["ZINST58"].append( copySPyLastVal[0, 0, 0].item()) copyRecodBox["ZINST63"].append( copySPyLastVal[0, 1, 0].item()) copyRecodBox["ZVCT"].append( copySPyLastVal[0, 2, 0].item()) copyRecodBox["BFV122"].append( copySComp[0, 0, 0].item()) copyRecodBox["BPV145"].append( copySComp[0, 1, 0].item()) # 예지 종료 결과값 Recode 그래픽화 [ self.ax_dict[i_].plot( ProgRecodBox[i_] + copyRecodBox[i_], label=f"{i_}_{__}") for i_ in [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145" ] ] # # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet() [ ProgRecodBox[i_].append( round(self.CNS.mem[i_]['Val'], r_) / t_) for i_, t_, r_ in zip(ProgRecodBox.keys(), tun, ro) ] # END Test Mode CODE [ self.ax_dict[i_].grid() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] [ self.ax_dict[i_].legend() for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] [ self.fig_dict[i_].savefig( f"{self.CurrentIter}_{i_}.png") for i_ in ["ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145"] ] print('END TEST') else: # Train Mode for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(fulltime): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] a_dict = {_: [] for _ in range(self.LocalNet.NubNET)} mu_dict = {_: [] for _ in range(self.LocalNet.NubNET)} a_now = {_: 0 for _ in range(self.LocalNet.NubNET)} a_prob = {_: [] for _ in range(self.LocalNet.NubNET)} r_dict = {_: [] for _ in range(self.LocalNet.NubNET)} done_dict = { _: [] for _ in range(self.LocalNet.NubNET) } # trag_mu = {_: [] for _ in range(self.LocalNet.NubNET)} # Sampling for t in range(t_max): NetOut_dict = { _: 0 for _ in range(self.LocalNet.NubNET) } for nubNet in [0, 2]: TOOL.ALLP(self.S_Py, 'S_Py') TOOL.ALLP(self.S_Comp, 'S_Comp') # TODO # Network는 0, 2은 actor net mu_v = self.LocalNet.NET[ nubNet].GetPredictActorOut( x_py=self.S_Py, x_comp=self.S_Comp) mu = # detach 이후 numpy로 반환 TOOL.ALLP(mu, "Mu") # Action 선택 logstd = self.LocalNet.NET[ nubNet] act = mu + np.exp(logstd) * np.random.normal( size=logstd.shape) act = np.clip(act, 0, 1) TOOL.ALLP(act, "ACT") # (1, 3) 또는 (1, 2) # 액션 및 mu 저장 a_dict[nubNet].append(act) mu_dict[nubNet].append(mu) NetOut_dict[nubNet] = act[ 0] # 현재 상태의 action DIS (3,) 또는 (2,) # 상태 저장 spy_lst.append(self.S_Py.tolist() [0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist() [0]) # (1, 2, 10) -list> (2, 10) # old val to compare the new val ComparedPara = [ "ZINST58", "ZINST63", "ZVCT", "BFV122", "BPV145" ] ComparedParaRound = [1, 1, 1, 2, 2] self.old_cns = { para: round(self.CNS.mem[para]['Val'], pr) for para, pr in zip(ComparedPara, ComparedParaRound) } # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet() self.new_cns = { para: round(self.CNS.mem[para]['Val'], pr) for para, pr in zip(ComparedPara, ComparedParaRound) } # 보상 및 종료조건 계산 r = {0: 0, 1: 0, 2: 0, 3: 0} pa = {0: 0, 1: 0, 2: 0, 3: 0} for nubNet in range( 0, self.LocalNet.NubNET): # 보상 네트워크별로 계산 및 저장 if nubNet == 0 or nubNet == 1: # TODO # 여기서 부터 작업해야함. r[nubNet] = 1 elif nubNet == 2 or nubNet == 3: pass r_dict[nubNet].append(r[nubNet]) # 종료 조건 계산 if __ == 14 and t == t_max - 1: done_dict[nubNet].append(0) done = True else: done_dict[nubNet].append(1) def dp_want_val(val, name): return f"{name}: {self.CNS.mem[val]['Val']:4.4f}" print( self.CurrentIter, f"{r[0]:4}|{r[1]:4}|{r[2]:4}|{r[3]:4}|{r[4]:6}|{r[5]:6}|", f'{NetOut_dict[0]:0.4f}', f'{NetOut_dict[1]:0.4f}', f'{NetOut_dict[2]:0.4f}', f'{NetOut_dict[3]:0.4f}', f'{NetOut_dict[4]:0.4f}', f'{NetOut_dict[5]:0.4f}', f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}", # dp_want_val('PVCT', 'VCT pressure'), f"VCT Level: {self.new_cns['ZVCT']}", f"{self.old_cns['ZVCT'] + pa[1]:5.2f} + {pa[1]:5.2f}", f"PZR pre: {self.new_cns['ZINST58']}", f"{self.old_cns['ZINST58'] + pa[2]:5.2f} + {pa[2]:5.2f}", f"PZR Level: {self.new_cns['ZINST63']}", f"{self.old_cns['ZINST63'] + pa[3]:5.2f} + {pa[3]:5.2f}", f"BFV122: {self.new_cns['BFV122']}", f"{self.new_cns['BFV122'] + pa[4]:5.2f} + {pa[4]:5.2f}", f"BFV122: {self.new_cns['BPV145']}", f"{self.new_cns['BPV145'] + pa[5]:5.2f} + {pa[5]:5.2f}", # dp_want_val('UPRT', 'PRT temp'), dp_want_val('ZINST48', 'PRT pressure'), # dp_want_val('ZINST36', 'Let-down flow'), dp_want_val('BFV122', 'Charging Valve pos'), # dp_want_val('BPV145', 'Let-down Valve pos'), ) # ================================================================================================== # Train gamma = 0.98 lmbda = 0.95 # 1 .. 10 spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) # 2 .. 10 + (1 Last value) spy_lst.append(self.S_Py.tolist()[0]) scomp_lst.append(self.S_Comp.tolist()[0]) spy_fin = torch.tensor(spy_lst[1:], dtype=torch.float) scomp_fin = torch.tensor(scomp_lst[1:], dtype=torch.float) # 각 네트워크 별 Advantage 계산 for nubNet in range(0, self.LocalNet.NubNET): # GAE # r_dict[nubNet]: (5,) -> (5,1) # Netout : (5,1) # done_dict[nubNet]: (5,) -> (5,1) td_target = torch.tensor(r_dict[nubNet], dtype=torch.float).view(t_max, 1) + \ gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \ torch.tensor(done_dict[nubNet], dtype=torch.float).view(t_max, 1) delta = td_target - self.LocalNet.NET[ nubNet].GetPredictCrticOut( spy_batch, scomp_batch) delta = delta.detach().numpy() adv_list = [] adv_ = 0.0 for reward in delta[::-1]: adv_ = gamma * adv_ * lmbda + reward[0] adv_list.append([adv_]) adv_list.reverse() adv = torch.tensor(adv_list, dtype=torch.float) PreVal = self.LocalNet.NET[ nubNet].GetPredictActorOut( spy_batch, scomp_batch) PreVal = PreVal.gather( 1, torch.tensor(a_dict[nubNet])) # PreVal_a # TOOL.ALLP(PreVal, f"Preval {nubNet}") # Ratio 계산 a/b == exp(log(a) - log(b)) # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}") Preval_old_a_prob = torch.tensor(a_prob[nubNet], dtype=torch.float) ratio = torch.exp( torch.log(PreVal) - torch.log(Preval_old_a_prob)) # TOOL.ALLP(ratio, f"ratio {nubNet}") # surr1, 2 eps_clip = 0.1 surr1 = ratio * adv surr2 = torch.clamp(ratio, 1 - eps_clip, 1 + eps_clip) * adv min_val = torch.min(surr1, surr2) smooth_l1_loss = nn.functional.smooth_l1_loss( self.LocalNet.NET[nubNet].GetPredictCrticOut( spy_batch, scomp_batch), td_target.detach()) loss = -min_val + smooth_l1_loss self.LocalOPT.NETOPT[nubNet].zero_grad() loss.mean().backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) # TOOL.ALLP(advantage.mean()) # print(self.CurrentIter, 'AgentNub: ', nubNet, # 'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(), # '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')') print('DONE EP') break
class Agent(mp.Process): def __init__(self, GlobalNet, MEM, CNS_ip, CNS_port, Remote_ip, Remote_port): mp.Process.__init__(self) # Network info self.GlobalNet = GlobalNet self.LocalNet = NETBOX() for _ in range(0, self.LocalNet.NubNET): self.LocalNet.NET[_].load_state_dict( self.GlobalNet.NET[_].state_dict()) self.LocalOPT = NETOPTBOX(NubNET=self.LocalNet.NubNET, NET=self.GlobalNet.NET) # CNS self.CNS = CNS(, CNS_ip, CNS_port, Remote_ip, Remote_port) # SharedMem self.mem = MEM self.LocalMem = copy.deepcopy(self.mem) # Work info self.W = Work_info() print(f'Make -- {self}') # ============================================================================================================== # 제어 신호 보내는 파트 def send_action_append(self, pa, va): for _ in range(len(pa)): self.para.append(pa[_]) self.val.append(va[_]) def send_action(self, act): # 전송될 변수와 값 저장하는 리스트 self.para = [] self.val = [] # 최종 파라메터 전송 self.CNS._send_control_signal(self.para, self.val) # # ============================================================================================================== # 입력 출력 값 생성 def InitialStateSet(self): self.PhyPara = ['ZINST58', 'ZINST63', 'ZVCT'] self.PhyState = {_: deque(maxlen=self.W.TimeLeg) for _ in self.PhyPara} self.COMPPara = ['BFV122', 'BPV145'] self.COMPState = { _: deque(maxlen=self.W.TimeLeg) for _ in self.COMPPara } def MakeStateSet(self): # 값을 쌓음 (return Dict) [ self.PhyState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.PhyPara ] [ self.COMPState[_].append( self.PreProcessing(_, self.CNS.mem[_]['Val'])) for _ in self.COMPPara ] # Tensor로 전환 self.S_Py = torch.tensor([self.PhyState[key] for key in self.PhyPara]) self.S_Py = self.S_Py.reshape(1, self.S_Py.shape[0], self.S_Py.shape[1]) self.S_Comp = torch.tensor( [self.COMPState[key] for key in self.COMPPara]) self.S_Comp = self.S_Comp.reshape(1, self.S_Comp.shape[0], self.S_Comp.shape[1]) # Old 1개 리스트 self.S_ONE_Py = [self.PhyState[key][-1] for key in self.PhyPara] self.S_ONE_Comp = [self.COMPState[key][-1] for key in self.COMPPara] def PreProcessing(self, para, val): if para == 'ZINST58': val = round(val / 1000, 7) # 가압기 압력 if para == 'ZINST63': val = round(val / 100, 7) # 가압기 수위 if para == 'ZVCT': val = round(val / 100, 7) # VCT 수위 return val # ============================================================================================================== def run(self): while True: size, maltime = ran.randint(100, 600), ran.randint(30, 100) * 5 self.CNS.reset(initial_nub=1, mal=True, mal_case=36, mal_opt=size, mal_time=maltime) print(f'DONE initial {size}, {maltime}') # Get iter self.CurrentIter = self.mem['Iter'] self.mem['Iter'] += 1 print(self.CurrentIter) # Initial done = False self.InitialStateSet() while not done: for t in range(self.W.TimeLeg): self.CNS.run_freeze_CNS() self.MakeStateSet() for __ in range(15): spy_lst, scomp_lst, a_lst, r_lst = [], [], [], [] a_dict = {_: [] for _ in range(self.LocalNet.NubNET)} a_now = {_: 0 for _ in range(self.LocalNet.NubNET)} a_prob = {_: [] for _ in range(self.LocalNet.NubNET)} r_dict = {_: [] for _ in range(self.LocalNet.NubNET)} done_dict = {_: [] for _ in range(self.LocalNet.NubNET)} # Sampling t_max = 5 for t in range(t_max): NetOut_dict = { _: 0 for _ in range(self.LocalNet.NubNET) } for nubNet in range(0, self.LocalNet.NubNET): # TOOL.ALLP(self.S_Py, 'S_Py') # TOOL.ALLP(self.S_Comp, 'S_Comp') NetOut = self.LocalNet.NET[ nubNet].GetPredictActorOut(x_py=self.S_Py, x_comp=self.S_Comp) NetOut = NetOut.view(-1) # (1, 2) -> (2, ) # TOOL.ALLP(NetOut, 'Netout before Categorical') if nubNet == 0: act = torch.distributions.Categorical( NetOut).sample().item( ) # 2개 중 샘플링해서 값 int 반환 # TOOL.ALLP(act, 'act') NetOut = NetOut.tolist()[act] # TOOL.ALLP(NetOut, 'NetOut') else: act = 0 NetOut = NetOut[0].item() NetOut_dict[nubNet] = NetOut # TOOL.ALLP(NetOut_dict, f'NetOut{nubNet}') a_now[nubNet] = act a_dict[nubNet].append([act]) a_prob[nubNet].append([NetOut]) spy_lst.append( self.S_Py.tolist()[0]) # (1, 2, 10) -list> (2, 10) scomp_lst.append(self.S_Comp.tolist() [0]) # (1, 2, 10) -list> (2, 10) # old val to compare the new val ComparedPara = ["ZVCT"] self.old_cns = { para: round(self.CNS.mem[para]['Val'], 2) for para in ComparedPara } # CNS + 1 Step self.CNS.run_freeze_CNS() self.MakeStateSet() self.new_cns = { para: round(self.CNS.mem[para]['Val'], 2) for para in ComparedPara } # 보상 및 종료조건 계산 r = {0: 0, 1: 0} for nubNet in range( 0, self.LocalNet.NubNET): # 보상 네트워크별로 계산 및 저장 if nubNet == 0: if self.CNS.mem['KCNTOMS']['Val'] < maltime: if a_now[nubNet] == 1: # Malfunction r[nubNet] = -1 else: r[nubNet] = 1 else: if a_now[nubNet] == 1: # Malfunction r[nubNet] = 1 else: r[nubNet] = -1 else: if self.old_cns["ZVCT"] + NetOut_dict[ 1] == self.new_cns["ZVCT"]: r[nubNet] = 1 else: r[nubNet] = -1 r_dict[nubNet].append(r[nubNet]) # 종료 조건 계산 if __ == 14 and t == t_max - 1: done_dict[nubNet].append(0) done = True else: done_dict[nubNet].append(1) def dp_want_val(val, name): return f"{name}: {self.CNS.mem[val]['Val']:3.4f}" print( self.CurrentIter, f"{r[0]:3}|{r[1]:3}|", f'{NetOut_dict[0]:0.4f}', f"TIME: {self.CNS.mem['KCNTOMS']['Val']:5}", dp_want_val('PVCT', 'VCT pressure'), f"VCT Level: {self.new_cns['ZVCT']} " f"{self.old_cns['ZVCT'] + NetOut_dict[1]:3.4f} + {NetOut_dict[1]:3.4f}", dp_want_val('UPRT', 'PRT temp'), dp_want_val('ZINST48', 'PRT pressure'), # dp_want_val('ZINST36', 'Let-down flow'), dp_want_val('BFV122', 'Charging Valve pos'), # dp_want_val('BPV145', 'Let-down Valve pos'), ) # ================================================================================================== # Train gamma = 0.98 lmbda = 0.95 # 1 .. 10 spy_batch = torch.tensor(spy_lst, dtype=torch.float) scomp_batch = torch.tensor(scomp_lst, dtype=torch.float) # 2 .. 10 + (1 Last value) spy_lst.append(self.S_Py.tolist()[0]) scomp_lst.append(self.S_Comp.tolist()[0]) spy_fin = torch.tensor(spy_lst[1:], dtype=torch.float) scomp_fin = torch.tensor(scomp_lst[1:], dtype=torch.float) # 각 네트워크 별 Advantage 계산 for nubNet in range(0, self.LocalNet.NubNET): # GAE # r_dict[nubNet]: (5,) -> (5,1) # Netout : (5,1) # done_dict[nubNet]: (5,) -> (5,1) td_target = torch.tensor(r_dict[nubNet], dtype=torch.float).view(t_max, 1) + \ gamma * self.LocalNet.NET[nubNet].GetPredictCrticOut(spy_fin, scomp_fin) * \ torch.tensor(done_dict[nubNet], dtype=torch.float).view(t_max, 1) delta = td_target - self.LocalNet.NET[ nubNet].GetPredictCrticOut(spy_batch, scomp_batch) delta = delta.detach().numpy() adv_list = [] adv_ = 0.0 for reward in delta[::-1]: adv_ = gamma * adv_ * lmbda + reward[0] adv_list.append([adv_]) adv_list.reverse() adv = torch.tensor(adv_list, dtype=torch.float) PreVal = self.LocalNet.NET[nubNet].GetPredictActorOut( spy_batch, scomp_batch) if nubNet == 0: PreVal = PreVal.gather( 1, torch.tensor(a_dict[nubNet])) # PreVal_a # TOOL.ALLP(PreVal, f"Preval {nubNet}") # Ratio 계산 a/b == exp(log(a) - log(b)) # TOOL.ALLP(a_prob[nubNet], f"a_prob {nubNet}") Preval_old_a_prob = torch.tensor(a_prob[nubNet], dtype=torch.float) ratio = torch.exp( torch.log(PreVal) - torch.log(Preval_old_a_prob)) # TOOL.ALLP(ratio, f"ratio {nubNet}") # surr1, 2 eps_clip = 0.1 surr1 = ratio * adv surr2 = torch.clamp(ratio, 1 - eps_clip, 1 + eps_clip) * adv min_val = torch.min(surr1, surr2) smooth_l1_loss = nn.functional.smooth_l1_loss( self.LocalNet.NET[nubNet].GetPredictCrticOut( spy_batch, scomp_batch), td_target.detach()) loss = -min_val + smooth_l1_loss self.LocalOPT.NETOPT[nubNet].zero_grad() loss.mean().backward() for global_param, local_param in zip( self.GlobalNet.NET[nubNet].parameters(), self.LocalNet.NET[nubNet].parameters()): global_param._grad = local_param.grad self.LocalOPT.NETOPT[nubNet].step() self.LocalNet.NET[nubNet].load_state_dict( self.GlobalNet.NET[nubNet].state_dict()) # TOOL.ALLP(advantage.mean()) # print(self.CurrentIter, 'AgentNub: ', nubNet, # 'adv: ', adv.mean().item(), 'loss: ', loss.mean().item(), # '= - min_val(', min_val.mean().item(), ') + Smooth(', smooth_l1_loss.mean().item(), ')') print('DONE EP') break