def GA_get_flooding_step(action_seq,startflie,floodStepFile,date_time,pumps): action_step=[] flooding_step=[] for i in range(0,action_seq.shape[0]): if i==0: action_step.append(action_seq[0]) action_step.append(action_seq[1]) #print(action_step) #action_step_list=action_step.tolist() change_rain.copy_result(floodStepFile+'.inp',startflie+'.inp') #print(date_time[0:i+2]) set_pump.set_pump(action_seq,date_time[0:2],pumps,floodStepFile+'.inp') simulation(floodStepFile+'.inp') _,flooding,_,_,_,_=get_rpt.get_rpt(floodStepFile+'.rpt') flooding_step.append(flooding) #print(flooding) if i>1: action_step.append(action_seq[i]) change_rain.copy_result(floodStepFile+'.inp',startflie+'.inp') set_pump.set_pump(action_seq,date_time[0:i+1],pumps,floodStepFile+'.inp') simulation(floodStepFile+'.inp') _,flooding,_,_,_,_=get_rpt.get_rpt(floodStepFile+'.rpt') flooding_step.append(flooding) #print(flooding_step) return flooding_step
for item in pump_list[pool]: pumps.append(item) xs.append(x) y = 1 - a #y=np.mean(a) ys.append(y) #设置pump并模拟之后才有reward action_seq.append(action) #stime=date_time[i] etime = date_time[i + 1] copy_result(aiinfile_tem + '.inp', aiinfile + '.inp') set_datetime.set_date(sdate, edate, stime, etime, aiinfile_tem + '.inp') set_pump.set_pump(action_seq, date_time[1:i + 1], pumps, aiinfile_tem + '.inp') simulation(aiinfile_tem + '.inp') #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( aiinfile_tem + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 pool_d = get_output.depth(aiinfile_tem + '.out', pool_list, date_t[i] - i) rain_sum = sum(rain[date_t[i]:date_t[i + 1]]) / max(rain) for pool in pool_list: reward = 0 if flooding / total_in >= 0.5:
def test(self): #反复多次进行模拟 saver = tf.train.Saver() saver.restore(self.sess, "./save/model.ckpt") etime = date_time[1] xs, ys, drs = [], [], [] pumps = [] for pool in self.pool_list: #pool为前池 for item in self.pump_list[pool]: pumps.append(item) for iten in range(len(self.testRainData)): reward_sum = 0 change_rain.copy_result(self.startfile + '.inp', 'arg-original.inp') change_rain.copy_result(self.infile + '.inp', 'arg-original.inp') rainData = self.testRainData[iten] change_rain.change_rain(rainData, self.startfile + '.inp') self.simulation(self.infile + '.inp') change_rain.copy_result( self.GA_tem + '.inp', self.startfile + '.inp') #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据 #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据 self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile, self.crossRate, self.mutationRate, self.lifeCount, self.date_time, self.pumps, self.GAStepNum) self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step( self.GA_action_seq, self.GA_tem, self.GAStepfile, self.date_time, self.pumps) begin = datetime.datetime.now() change_rain.copy_result( './sim/test/result/en/inp/' + str(iten) + '.inp', self.startfile + '.inp') change_rain.copy_result( './sim/test/result/en/rpt/' + str(iten) + '.rpt', self.startfile + '.rpt') set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') self.simulation(self.infile + '.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, self.date_t[1]) action_seq = [] log_reward = '' for i in range(1, len(self.date_t) - 1): #用1场降雨生成结果,随机生成batch_size场降雨的inp rain_sum = sum( rainData[self.date_t[i]:self.date_t[i + 1]]) / max(rainData) action = [] for pool in self.pool_list: observation = [ outflow / total_in, flooding / total_in, store / total_in, self.pool_d[pool], rain_sum ] x_in = observation x = np.reshape(x_in, [1, self.D]) tfprob = self.sess.run(self.probability, feed_dict={self.observations: x}) #对flage初始化为0,针对不同的情况设置flage flage = 0 if self.pool_d[pool] > (self.limit_level[pool][ 0]) and self.pool_d[pool] < (self.limit_level[ pool][2]): #判断是否到达最低水位,当到达最低水位时,设置flag为True flage = 0 elif self.pool_d[pool] < (self.limit_level[pool][0] ): #当前池水位小于最低开泵水位时,flage设置为-1 flage = -1 else: flage = 1 #泵的启停策略 if flage == 0: if tfprob < self.action_space[1] + 0.1: #概率小于0的时候,不开泵 action.append(0) action.append(0) a = 0 elif tfprob >= self.action_space[ 1] + 0.1 and tfprob < self.action_space[ 1] + 0.6: #概率在0.1~0.6的时候,开一个泵 action.append(0) action.append(1) a = 1 else: #开两个泵 action.append(1) action.append(1) a = 1 elif flage == -1: action.append(0) action.append(0) a = 0 else: #flag为1的时候全开 action.append(1) action.append(1) a = 1 xs.append(x) y = 1 - a ys.append(y) #设置pump并模拟之后才有reward action_seq.append(action) #stime=date_time[i] etime = self.date_time[i + 1] set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') set_pump.set_pump(action_seq, self.date_time[1:i + 1], pumps, self.infile + '.inp') self.simulation(self.infile + '.inp') #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, date_t[i] - i) #reward计算 当前reward只考虑溢流污染控制 for pool in self.pool_list: reward_sum += (self.GA_flooding_step[i] - flooding) / ( 0.0001 + total_in) #GA算法的flooding数据作为DDQN每步模拟的基准数据 drs.append((self.GA_flooding_step[i] - flooding) / (0.0001 + total_in)) log_reward += str(reward_sum) + '\n' end = datetime.datetime.now() print(iten, ' ', end - begin) f = open('reward' + str(iten) + '.txt', 'w') f.write(log_reward) #保存inp与rpt文件 change_rain.copy_result( './sim/test/result/ai/inp/' + str(iten) + '.inp', './sim/test/oti.inp') change_rain.copy_result( './sim/test/result/ai/rpt/' + str(iten) + '.rpt', './sim/test/oti.rpt') print("操控序列:", action_seq) print("得分:", reward_sum)
def train(self): xs, ys, drs = [], [], [] rendering = False init = tf.global_variables_initializer() self.sess.run(init) gradBuffer = self.sess.run(self.tvars) for ix, grad in enumerate(gradBuffer): gradBuffer[ix] = grad * 0 etime = date_time[1] episode_number = 0 print(len(self.trainRainData)) pumps = [] for pool in self.pool_list: #pool为前池 for item in self.pump_list[pool]: pumps.append(item) while episode_number < len(self.trainRainData): reward_sum = 0 rainData = self.trainRainData[episode_number] s1 = datetime.datetime.now() change_rain.copy_result( self.startfile + '.inp', 'arg-original.inp' ) #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据 change_rain.copy_result( self.infile + '.inp', 'arg-original.inp') #初始化生成一个infile文件,infile为每个模拟所用input文件 change_rain.change_rain(rainData, self.startfile + '.inp') #修改start inp文件中的降雨数据 #print(A,C,P,b,n,R) change_rain.copy_result(self.GA_tem + '.inp', self.startfile + '.inp') #将修改了降雨数据的输入文件,复制为GA——tem文件 #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据 self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile, self.crossRate, self.mutationRate, self.lifeCount, self.date_time, pumps, self.GAStepNum) self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step( self.GA_action_seq, self.GA_tem, self.GAStepfile, self.date_time, pumps) #先sim10min set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') #修改start input文件中的时间数据 change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') #将修改降雨数据后的start inp文件复制为infile文件 self.simulation(self.infile + '.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, self.date_t[1]) action_seq = [] for i in range(1, len(self.date_t) - 1): #对于一场雨中的每一步进行迭代运算 rain_sum = sum( rainData[self.date_t[i]:self.date_t[i + 1]]) / max(rainData) action = [] for pool in self.pool_list: #pool为前池 observation = [ outflow / (0.001 + total_in), flooding / (0.001 + total_in), store / (0.001 + total_in), self.pool_d[pool], rain_sum ] x_in = observation x = np.reshape(x_in, [1, self.D]) tfprob = self.sess.run(self.probability, feed_dict={self.observations: x}) #开关泵概率 #对flage初始化为0,针对不同的情况设置flage flage = 0 if self.pool_d[pool] > (self.limit_level[pool][0] ) and self.pool_d[pool] < ( self.limit_level[pool][2] ): #判断是否大于最低水位,小于最高水位时,设置flag为0 flage = 0 elif self.pool_d[pool] < (self.limit_level[pool][0] ): #当前池水位小于最低开泵水位时,flage设置为-1 flage = -1 else: #否则为1 flage = 1 #泵的启停策略 if flage == 0: if tfprob < self.action_space[1] + 0.1: #概率小于0.1的时候,不开泵 action.append(0) action.append(0) a = 0 elif tfprob >= self.action_space[ 1] + 0.1 and tfprob < self.action_space[ 1] + 0.6: #概率在0.1~0.6的时候,开一个泵 action.append(0) action.append(1) a = 1 else: #开两个泵 action.append(1) action.append(1) a = 1 elif flage == -1: #小于最低水位不开泵 action.append(0) action.append(0) a = 0 else: #flag为1的时候大于最高水位,泵全开 action.append(1) action.append(1) a = 1 xs.append(x) y = 1 - a ys.append(y) #添加所有泵的action action_seq.append(action) #print(action_seq) #stime=date_time[i] etime = date_time[i + 1] set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') set_pump.set_pump(action_seq, date_time[1:i + 1], pumps, self.infile + '.inp') self.simulation(self.infile + '.inp') #获取rpt内信息,当前时刻的flooding,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, self.date_t[i] - i) #reward计算 当前reward只考虑溢流污染控制 for pool in self.pool_list: reward_sum += (self.GA_flooding_step[i] - flooding) / ( 0.0001 + total_in) #GA算法的flooding数据作为DDQN每步模拟的基准数据 drs.append((self.GA_flooding_step[i] - flooding) / (0.0001 + total_in)) episode_number += 1 #完成一场降雨模拟,更新agent #when the game over, which means the stick fall; means the time is out in the new situation #记录一场降雨的reward epx = np.vstack(xs) epy = np.vstack(ys) epr = np.vstack(drs) xs, ys, drs = [], [], [] discounted_epr = self.discount_reward(epr) discounted_epr -= np.mean(discounted_epr) discounted_epr /= np.std(discounted_epr) tGrad = self.sess.run(self.newGrads, feed_dict={ self.observations: epx, self.input_y: epy, self.advantages: discounted_epr }) for ix, grad in enumerate(tGrad): gradBuffer[ix] += grad #若已有一个batch的reward值,用于更新agent if episode_number % self.batch_size == 0: #print("train") self.sess.run(self.updateGrads, feed_dict={ self.W1Grad: gradBuffer[0], self.W2Grad: gradBuffer[1] }) print('Average reward for %d:%f.' % (episode_number, reward_sum / self.batch_size)) #reward_sum=0 for ix, grad in enumerate(gradBuffer): gradBuffer[ix] = grad * 0 #if abs(old_reward-reward_sum/self.batch_size)/abs(old_reward)<=1e-15: #print("Task soveld in", episode_number) #break #old_reward=reward_sum/self.batch_size #observation=env.reset() s2 = datetime.datetime.now() print(s2 - s1) print("training done") saver = tf.train.Saver() sp = saver.save(self.sess, "./save/model.ckpt") print("model saved:", sp) return drs
def step(self,a,raindata): #修改statf的date,根据iten逐步向前 #加入action #开始模拟,存储结果 self.iten+=1 action=[] pumps=[] #print('a=',a) for pool in self.pool_list: #检测水位,根据水位决定泵启停 flage=0 if self.pool_d[pool]>(self.limit_level[pool][0]) and self.pool_d[pool]<(self.limit_level[pool][2]): flage=0 elif self.pool_d[pool]<(self.limit_level[pool][0]): flage=-1 else: flage=1 #泵的启停策略 if flage==0: if a<self.action_space[1]+0.1: action.append(0) action.append(0) elif a>=self.action_space[1]+0.1 and a<self.action_space[1]+0.6: action.append(0) action.append(1) else: action.append(1) action.append(1) elif flage==-1: action.append(0) action.append(0) else: action.append(1) action.append(1) for item in self.pump_list[pool]: pumps.append(item) #设置pump并模拟之后才有reward self.action_seq.append(action) #print(self.action_seq) set_pump.set_pump(self.action_seq,self.date_time[1:self.iten],pumps,self.orftem+'.inp') tem_etime=self.date_time[self.iten] set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,self.orftem+'.inp') #change_rain.copy_result(infile+'.inp',startfile+'.inp') #还原SWMM缓存inp change_rain.copy_result(self.staf+'.inp',self.orftem+'.inp') change_rain.copy_result(self.orftem+'.inp',self.orf_rain+'.inp') #step forward self.simulation(self.staf+'.inp') #从out和rpt文件读取sate值 #如果iten==最终的时间步,模拟停止 total_in,flooding,store,outflow,upflow,downflow=get_rpt.get_rpt(self.staf+'.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d=get_output.depth(self.staf+'.out',self.pool_list,self.date_t[self.iten]-self.iten) rain_sum=sum(raindata[self.date_t[self.iten]:self.date_t[self.iten+1]])/max(raindata) for pool in self.pool_list: state=np.array([outflow/(0.001+total_in),flooding/(0.001+total_in),store/(0.001+total_in),self.pool_d[pool],rain_sum]) self.simulation(self.staf+'.inp') #reward计算 当前reward只考虑溢流污染控制 ''' #reward1 reward_sum=0 for pool in self.pool_list: if flooding>total_in*0.1: reward_sum+=-1.0 else: reward_sum+=1.0 ''' #try different reward # change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp') # set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp') # #reward2使用的标准比对baseline # self.simulation('compare_tem_HC.inp') # _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt') ''' #reward2 reward_sum=0 for pool in self.pool_list: if flooding_compare!=0.0: reward_sum+=(flooding_compare-flooding)/flooding_compare else: reward_sum+=-flooding/(0.0001+total_in) ''' #与GA算法计算的flooding进行比较 reward_sum=0 for pool in self.pool_list: reward_sum+=(self.GA_flooding_step[self.iten]-flooding)/(0.0001+total_in)#GA算法的flooding数据作为DDQN每步模拟的基准数据 if self.iten==self.T-2: done=True else: done=False return state,reward_sum,done,{}
def GA_sim(startfile, simfile, crossRate, mutationRate, lifeCount, date_time, pumps, stepNum): iten = 0 iten += 1 change_rain.copy_result(simfile + '.inp', startfile + '.inp') #将修改了rain数据的infile inp文件进行复制 action_seq = [] t_reward = [] begin = datetime.datetime.now() #用优化算法生成控制策略二维矩阵action_seq #初始化 lives = initPopulation(lifeCount, len(date_time) * len(pumps)) scores = [] bounds = 0 generation = 0 for gene in lives: tem = np.array(gene) action_seq = list(tem.reshape(len(date_time), len(pumps))) #25*8的数组 #print(action_seq) change_rain.copy_result(simfile + '.inp', startfile + '.inp') #将startfile复制为infile set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps, simfile + '.inp') simulation(simfile + '.inp') #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( simfile + '.rpt') scores.append(1 / (1 + flooding)) score = 1 / (1 + flooding) bounds += score best = lives[scores.index(max(scores))] #print(best) #初始化end begin = datetime.datetime.now() for i in range(stepNum): #评估,计算每一个个体的适配值 newLives = [] newLives.append(best) #把最好的个体加入下一代 while len(newLives) < lifeCount: newLives.append( newChild(crossRate, mutationRate, lives, scores, bounds)) lives = newLives generation += 1 scores = [] bounds = 0 #print('step'+str(i)) for gene in lives: #print(action_seq) tem = np.array(gene) action_seq = list(tem.reshape(len(date_time), len(pumps))) change_rain.copy_result(simfile + '.inp', startfile + '.inp') set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps, simfile + '.inp') simulation(simfile + '.inp') #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( simfile + '.rpt') score = 1 / (1 + flooding) scores.append(score) bounds += score best = lives[scores.index(max(scores))] max_scors = max(scores) end = datetime.datetime.now() #print(i,' ',end-begin) #最佳策略的模拟结果 tem = np.array(best) action_seq = tem.reshape(len(date_time), len(pumps)) change_rain.copy_result(simfile + '.inp', startfile + '.inp') set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps, simfile + '.inp') simulation(simfile + '.inp') total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( simfile + '.rpt') score = 1 / (1 + flooding) end = datetime.datetime.now() #print('search done, time: ',end-begin) #保存训练的inp与rpt文件 #if(trainBool==True): copy_result('./sim/GA/GA_' + str(iten) + '.inp', simfile + '.inp') #将每次模拟所用的GA inp文件和rpt文件储存起来 copy_result('./sim/GA/GA_' + str(iten) + '.rpt', simfile + '.rpt') # if(testBool==True): # copy_result('./test_result/GA/GA_'+str(iten)+'.inp',simfile+'.inp') # copy_result('./test_result/GA/GA_'+str(iten)+'.rpt',simfile+'.rpt') #print("操控序列:",action_seq.tolist()) #print("得分:",reward_sum) #np.savetxt('./sim/GAActionSeq.txt',action_seq,fmt='%f',delimiter=',') return action_seq
def step(self, a, rainData): #对降雨数据进行逐步模拟 #修改statf的date,根据iten逐步向前 #加入action #开始模拟,存储结果 action = [] pumps = [] self.iten += 1 #print('a=',a) for pool in self.pool_list: #检测水位,根据水位决定泵启停 flage = 0 if self.pool_d[pool] > ( self.limit_level[pool][0]) and self.pool_d[pool] < ( self.limit_level[pool][2]): #当前池水位大于最低开泵水位,小于最高水位时 flage = 0 elif self.pool_d[pool] < ( self.limit_level[pool][0]): #当前池水位小于最低开泵水位时,flage设置为-1 flage = -1 else: flage = 1 #泵的启停策略 if flage == 0: if a < self.action_space[1] + 0.1: #概率小于0.1的时候,不开泵 action.append(0) action.append(0) elif a >= self.action_space[1] + 0.1 and a < self.action_space[ 1] + 0.6: #概率在0.1~0.6的时候,开一个泵 action.append(0) action.append(1) else: #开两个泵 action.append(1) action.append(1) elif flage == -1: action.append(0) action.append(0) else: #flag为1的时候全开 action.append(1) action.append(1) for item in self.pump_list[pool]: pumps.append(item) #设置pump并模拟之后才有reward self.action_seq.append(action) #print(self.action_seq) set_pump.set_pump(self.action_seq, self.date_time[1:self.iten], pumps, self.orftem + '.inp') #对self.orftem+'.inp'文件修改泵的策略 #print(pumps) tem_etime = self.date_time[self.iten] set_datetime.set_date(self.sdate, self.edate, self.stime, tem_etime, self.orftem + '.inp') #在修改泵的策略以后,修改self.orftem+'.inp'文件 #change_rain.copy_result(infile+'.inp',startfile+'.inp') change_rain.copy_result(self.staf + '.inp', self.orftem + '.inp') #将self.orftem+'.inp'文件复制为self.staf文件 change_rain.copy_result(self.orftem + '.inp', self.orf_rain + '.inp') #还原 将self.orftem+'.inp'修改为只修改了降雨数据的文件 self.simulation(self.staf + '.inp') #从out和rpt文件读取sate值 #如果iten==最终的时间步,模拟停止 total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.staf + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.staf + '.out', self.pool_list, self.date_t[self.iten] - self.iten) rain_sum = sum( rainData[self.date_t[self.iten]:self.date_t[self.iten + 1]]) / max(rainData) for pool in self.pool_list: state = np.array([ outflow / (0.001 + total_in), flooding / (0.001 + total_in), store / (0.001 + total_in), self.pool_d[pool], rain_sum ]) #reward计算 当前reward只考虑溢流污染控制 #在这里对每场降雨进行GA运算,并对最后运算出来的泵的控制策略时间序列,进行模拟计算,提取每一步的flooding数据 #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')#还原 #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp') #self.simulation(self.staf+'.inp') 重复的吧? #try different reward #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp') #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp') #reward2使用的标准比对baseline # self.simulation('compare_tem_HC.inp') # _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt') ''' #reward1 reward_sum=0 for pool in self.pool_list: if flooding>total_in*0.1: reward_sum+=-1.0 else: reward_sum+=1.0 ''' ''' #reward2 reward_sum=0 for pool in self.pool_list: if flooding_compare!=0.0: reward_sum+=(flooding_compare-flooding)/flooding_compare else: reward_sum+=-flooding/(0.0001+total_in) ''' #与GA算法计算的flooding进行比较 reward_sum = 0 for pool in self.pool_list: reward_sum += (self.GA_flooding_step[self.iten] - flooding) / ( 0.0001 + total_in) #GA算法的flooding数据作为DDQN每步模拟的基准数据 if self.iten == self.T - 2: done = True else: done = False return state, reward_sum, done, {}