def GA_get_flooding_step(action_seq,startflie,floodStepFile,date_time,pumps): action_step=[] flooding_step=[] for i in range(0,action_seq.shape[0]): if i==0: action_step.append(action_seq[0]) action_step.append(action_seq[1]) #print(action_step) #action_step_list=action_step.tolist() change_rain.copy_result(floodStepFile+'.inp',startflie+'.inp') #print(date_time[0:i+2]) set_pump.set_pump(action_seq,date_time[0:2],pumps,floodStepFile+'.inp') simulation(floodStepFile+'.inp') _,flooding,_,_,_,_=get_rpt.get_rpt(floodStepFile+'.rpt') flooding_step.append(flooding) #print(flooding) if i>1: action_step.append(action_seq[i]) change_rain.copy_result(floodStepFile+'.inp',startflie+'.inp') set_pump.set_pump(action_seq,date_time[0:i+1],pumps,floodStepFile+'.inp') simulation(floodStepFile+'.inp') _,flooding,_,_,_,_=get_rpt.get_rpt(floodStepFile+'.rpt') flooding_step.append(flooding) #print(flooding_step) return flooding_step
def test(self, test_num): """train method. """ saver = tf.train.Saver() saver.restore(self.sess, "./save/model.ckpt") dr = [] for i in range(test_num): acc_r = [0] observation = self.env.reset(self.testRainData[i]) #print('obtest=',observation) while True: # if total_steps-MEMORY_SIZE > 9000: env.render() action = self.choose_action(observation) f_action = (action - (self.n_actions - 1) / 2) / ( (self.n_actions) / 4) # [-2 ~ 2] float actions observation_, reward, done, info = self.env.step( np.array([f_action]), self.testRainData[i]) #print(observation_, reward, done, info) reward /= 10 # normalize to a range of (-1, 0) acc_r.append(reward + acc_r[-1]) # accumulated reward #self.store_transition(observation, action, reward, observation_) observation = observation_ #print('obtest=',observation) if done: break dr.append(acc_r) #对比HC change_rain.copy_result('./test_result/HC/compare_tem_HC' + str(i) + '.inp', self.env.orf_rain + '.inp') #还原 tem_etime = self.env.date_time[self.env.iten] set_datetime.set_date( self.env.sdate, self.env.edate, self.env.stime, tem_etime, './test_result/HC/compare_tem_HC' + str(i) + '.inp') self.env.simulation('./test_result/HC/compare_tem_HC' + str(i) + '.inp') #history['episode'].append(i) #history['Episode_reward'].append(episode_reward) #print('Episode: {} | Episode reward: {:.2f}'.format(i, episode_reward)) sout = './test_result/DDQN/DDQN_' + str(i) + '.rpt' sin = self.env.staf + '.rpt' change_rain.copy_result(sout, sin) #self.env.copy_result(sout,sin) return dr
def test(self,test_num): """train method. """ #加载模型 saver=tf.train.Saver() saver.restore(self.sess,"./save/model.ckpt") dr=[] for i in range(test_num): print('test'+str(i)) s =self.env.reset(self.testRainData[i]) t = 0 track_r = [] while True: a = self.actor.choose_action(s) s_, r, done, info = self.env.step(a,self.testRainData[i]) #if done: r = -20 track_r.append(r) td_error = self.critic.learn(s, r, s_) # gradient = grad[r + gamma * V(s_) - V(s)] self.actor.learn(s, a, td_error) # true_gradient = grad[logPi(s,a) * td_error] s = s_ t += 1 if done: dr.append(track_r) break #对比HC change_rain.copy_result('./test_result/HC/compare_tem_HC'+str(i)+'.inp',self.env.orf_rain+'.inp')#还原 tem_etime=self.env.date_time[self.env.iten] set_datetime.set_date(self.env.sdate,self.env.edate,self.env.stime,tem_etime,'./test_result/HC/compare_tem_HC'+str(i)+'.inp') self.env.simulation('./test_result/HC/compare_tem_HC'+str(i)+'.inp') #history['episode'].append(i) #history['Episode_reward'].append(episode_reward) #print('Episode: {} | Episode reward: {:.2f}'.format(i, episode_reward)) sout='./test_result/'+str(i)+'.rpt' sin=self.env.staf+'.rpt' change_rain.copy_result(sout,sin) #self.env.copy_result(sout,sin) return dr
def __init__(self, date_time, date_t): self.action_space = [1.0, 0.0] self.observation_space = 5 self.orf = './sim/orf' #原始的inp文件,只含有管网信息 self.orf_rain = './sim/orf_rain' # self.staf = './sim/staf' #用于sim的inp文件,在orf基础上修改了时间与降雨 self.orftem = './sim/orf_tem' #最终模拟使用的file self.GA_tem = './sim/GA_tem' #GA临时inpfile self.GAfile = './sim/GAfile' #GA模拟使用的inpfile self.GAStepfile = './sim/GAStepfile' #GA分步模拟使用的inpfile change_rain.copy_result(self.staf + '.inp', self.orf + '.inp') change_rain.copy_result(self.orf_rain + '.inp', self.orf + '.inp') change_rain.copy_result(self.orftem + '.inp', self.orf + '.inp') self.date_time = date_time self.date_t = date_t self.T = len(self.date_t) self.deltt = 1 self.iten = 0 #当前模拟的时间步 self.action_seq = [] self.sdate = self.edate = '08/28/2015' #先sim10min self.stime = date_time[0] self.etime = date_time[1] self.pump_list = { 'CC-storage': ['CC-Pump-1', 'CC-Pump-2'] } #,'JK-storage':['JK-Pump-1','JK-Pump-2'],'XR-storage':['XR-Pump-1','XR-Pump-2','XR-Pump-3','XR-Pump-4']} self.limit_level = { 'CC-storage': [0.9, 3.02, 4.08] } #,'JK-storage':[0.9,3.02,4.08],'XR-storage':[0.9,1.26,1.43,1.61,1.7]} self.max_depth = { 'CC-storage': 5.6 } #,'JK-storage':4.8,'XR-storage':7.72} self.pool_list = ['CC-storage'] #,'JK-storage','XR-storage'] self.rain = [] self.pool_d = [] self.crossRate = 0.7 self.mutationRate = 0.02 self.lifeCount = 10 self.GA_action_seq = [] self.GAStepNum = 1 self.GA_flooding_step = []
def reset(self,raindata): #每一次batch都新生成一个新的降雨 #每一次reset都赋予新的降雨,新的泵序列 # set_datetime.set_date(self.sdate,self.edate,self.stime,self.etime,self.staf+'.inp') # A=random.randint(100,150) # C=random.randint(3,9)/10.00 # P=random.randint(1,5) # b=12 # n=0.77 # R=random.randint(3,7)/10.00 # self.rain=change_rain.gen_rain(self.date_t[-1],A,C,P,b,n,R,self.deltt) change_rain.change_rain(raindata,self.orf_rain+'.inp')#先修改self.orf_rain,再复制给staf change_rain.copy_result(self.staf+'.inp',self.orf_rain+'.inp') change_rain.copy_result(self.orftem+'.inp',self.orf_rain+'.inp') change_rain.copy_result(self.GA_tem+'.inp',self.orf_rain+'.inp')#将修改了降雨数据的self.orf_rain.inp复制给GAfile.inp文件 self.iten=1 self.action_seq=[] tem_etime=self.date_time[self.iten] set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,self.staf+'.inp') ''' #对比HC change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')#还原 set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp') self.simulation('compare_tem_HC.inp') ''' #获取所有的pumps pumps=[] for pool in self.pool_list: for item in self.pump_list[pool]: pumps.append(item) #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据 self.GA_action_seq=GA_sim.GA_sim(self.GA_tem,self.GAfile,self.crossRate,self.mutationRate,self.lifeCount,self.date_time,pumps,self.GAStepNum) self.GA_flooding_step=GA_get_flooding_step.GA_get_flooding_step(self.GA_action_seq,self.GA_tem,self.GAStepfile,self.date_time,pumps) self.simulation(self.staf+'.inp') total_in,flooding,store,outflow,upflow,downflow=get_rpt.get_rpt(self.staf+'.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d=get_output.depth(self.staf+'.out',self.pool_list,self.date_t[self.iten]-self.iten) rain_sum=sum(raindata[self.date_t[self.iten]:self.date_t[self.iten+1]])/max(raindata) for pool in self.pool_list: state=np.array([outflow/total_in,flooding/total_in,store/total_in,self.pool_d[pool],rain_sum]) return state
#每一次batch都新生成一个新的降雨 A = random.randint(50, 150) C = random.randint(3, 9) / 10.00 #P=random.randint(1,5) #b=random.randint(1,3) #n=random.random() P = random.randint(1, 5) b = 12 n = 0.77 R = random.randint(2, 8) / 10.00 rain = change_rain.gen_rain(date_t[-1], A, C, P, b, n, R, deltt) bcfile = './boundary condition/otbc' bcstartfile = './boundary condition/ot' change_rain.copy_result(bcstartfile + '.inp', bcfile + '.inp') change_rain.change_rain(rain, bcfile + '.inp') #print(A,C,P,b,n,R) sdate = edate = '08/28/2015' stime = date_time[0] etime = date_time[-1] set_datetime.set_date(sdate, edate, stime, etime, bcfile + '.inp') simulation(bcfile + '.inp') filename = bcfile + '.out' sub, node, link, sub_name, node_name, link_name = get_output.read_out( filename) #边界条件在node中 inflow_data = bc_data(node, 'inflow')
def test(self): #反复多次进行模拟 saver = tf.train.Saver() saver.restore(self.sess, "./save/model.ckpt") etime = date_time[1] xs, ys, drs = [], [], [] pumps = [] for pool in self.pool_list: #pool为前池 for item in self.pump_list[pool]: pumps.append(item) for iten in range(len(self.testRainData)): reward_sum = 0 change_rain.copy_result(self.startfile + '.inp', 'arg-original.inp') change_rain.copy_result(self.infile + '.inp', 'arg-original.inp') rainData = self.testRainData[iten] change_rain.change_rain(rainData, self.startfile + '.inp') self.simulation(self.infile + '.inp') change_rain.copy_result( self.GA_tem + '.inp', self.startfile + '.inp') #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据 #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据 self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile, self.crossRate, self.mutationRate, self.lifeCount, self.date_time, self.pumps, self.GAStepNum) self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step( self.GA_action_seq, self.GA_tem, self.GAStepfile, self.date_time, self.pumps) begin = datetime.datetime.now() change_rain.copy_result( './sim/test/result/en/inp/' + str(iten) + '.inp', self.startfile + '.inp') change_rain.copy_result( './sim/test/result/en/rpt/' + str(iten) + '.rpt', self.startfile + '.rpt') set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') self.simulation(self.infile + '.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, self.date_t[1]) action_seq = [] log_reward = '' for i in range(1, len(self.date_t) - 1): #用1场降雨生成结果,随机生成batch_size场降雨的inp rain_sum = sum( rainData[self.date_t[i]:self.date_t[i + 1]]) / max(rainData) action = [] for pool in self.pool_list: observation = [ outflow / total_in, flooding / total_in, store / total_in, self.pool_d[pool], rain_sum ] x_in = observation x = np.reshape(x_in, [1, self.D]) tfprob = self.sess.run(self.probability, feed_dict={self.observations: x}) #对flage初始化为0,针对不同的情况设置flage flage = 0 if self.pool_d[pool] > (self.limit_level[pool][ 0]) and self.pool_d[pool] < (self.limit_level[ pool][2]): #判断是否到达最低水位,当到达最低水位时,设置flag为True flage = 0 elif self.pool_d[pool] < (self.limit_level[pool][0] ): #当前池水位小于最低开泵水位时,flage设置为-1 flage = -1 else: flage = 1 #泵的启停策略 if flage == 0: if tfprob < self.action_space[1] + 0.1: #概率小于0的时候,不开泵 action.append(0) action.append(0) a = 0 elif tfprob >= self.action_space[ 1] + 0.1 and tfprob < self.action_space[ 1] + 0.6: #概率在0.1~0.6的时候,开一个泵 action.append(0) action.append(1) a = 1 else: #开两个泵 action.append(1) action.append(1) a = 1 elif flage == -1: action.append(0) action.append(0) a = 0 else: #flag为1的时候全开 action.append(1) action.append(1) a = 1 xs.append(x) y = 1 - a ys.append(y) #设置pump并模拟之后才有reward action_seq.append(action) #stime=date_time[i] etime = self.date_time[i + 1] set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') set_pump.set_pump(action_seq, self.date_time[1:i + 1], pumps, self.infile + '.inp') self.simulation(self.infile + '.inp') #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, date_t[i] - i) #reward计算 当前reward只考虑溢流污染控制 for pool in self.pool_list: reward_sum += (self.GA_flooding_step[i] - flooding) / ( 0.0001 + total_in) #GA算法的flooding数据作为DDQN每步模拟的基准数据 drs.append((self.GA_flooding_step[i] - flooding) / (0.0001 + total_in)) log_reward += str(reward_sum) + '\n' end = datetime.datetime.now() print(iten, ' ', end - begin) f = open('reward' + str(iten) + '.txt', 'w') f.write(log_reward) #保存inp与rpt文件 change_rain.copy_result( './sim/test/result/ai/inp/' + str(iten) + '.inp', './sim/test/oti.inp') change_rain.copy_result( './sim/test/result/ai/rpt/' + str(iten) + '.rpt', './sim/test/oti.rpt') print("操控序列:", action_seq) print("得分:", reward_sum)
def train(self): xs, ys, drs = [], [], [] rendering = False init = tf.global_variables_initializer() self.sess.run(init) gradBuffer = self.sess.run(self.tvars) for ix, grad in enumerate(gradBuffer): gradBuffer[ix] = grad * 0 etime = date_time[1] episode_number = 0 print(len(self.trainRainData)) pumps = [] for pool in self.pool_list: #pool为前池 for item in self.pump_list[pool]: pumps.append(item) while episode_number < len(self.trainRainData): reward_sum = 0 rainData = self.trainRainData[episode_number] s1 = datetime.datetime.now() change_rain.copy_result( self.startfile + '.inp', 'arg-original.inp' ) #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据 change_rain.copy_result( self.infile + '.inp', 'arg-original.inp') #初始化生成一个infile文件,infile为每个模拟所用input文件 change_rain.change_rain(rainData, self.startfile + '.inp') #修改start inp文件中的降雨数据 #print(A,C,P,b,n,R) change_rain.copy_result(self.GA_tem + '.inp', self.startfile + '.inp') #将修改了降雨数据的输入文件,复制为GA——tem文件 #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据 self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile, self.crossRate, self.mutationRate, self.lifeCount, self.date_time, pumps, self.GAStepNum) self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step( self.GA_action_seq, self.GA_tem, self.GAStepfile, self.date_time, pumps) #先sim10min set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') #修改start input文件中的时间数据 change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') #将修改降雨数据后的start inp文件复制为infile文件 self.simulation(self.infile + '.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, self.date_t[1]) action_seq = [] for i in range(1, len(self.date_t) - 1): #对于一场雨中的每一步进行迭代运算 rain_sum = sum( rainData[self.date_t[i]:self.date_t[i + 1]]) / max(rainData) action = [] for pool in self.pool_list: #pool为前池 observation = [ outflow / (0.001 + total_in), flooding / (0.001 + total_in), store / (0.001 + total_in), self.pool_d[pool], rain_sum ] x_in = observation x = np.reshape(x_in, [1, self.D]) tfprob = self.sess.run(self.probability, feed_dict={self.observations: x}) #开关泵概率 #对flage初始化为0,针对不同的情况设置flage flage = 0 if self.pool_d[pool] > (self.limit_level[pool][0] ) and self.pool_d[pool] < ( self.limit_level[pool][2] ): #判断是否大于最低水位,小于最高水位时,设置flag为0 flage = 0 elif self.pool_d[pool] < (self.limit_level[pool][0] ): #当前池水位小于最低开泵水位时,flage设置为-1 flage = -1 else: #否则为1 flage = 1 #泵的启停策略 if flage == 0: if tfprob < self.action_space[1] + 0.1: #概率小于0.1的时候,不开泵 action.append(0) action.append(0) a = 0 elif tfprob >= self.action_space[ 1] + 0.1 and tfprob < self.action_space[ 1] + 0.6: #概率在0.1~0.6的时候,开一个泵 action.append(0) action.append(1) a = 1 else: #开两个泵 action.append(1) action.append(1) a = 1 elif flage == -1: #小于最低水位不开泵 action.append(0) action.append(0) a = 0 else: #flag为1的时候大于最高水位,泵全开 action.append(1) action.append(1) a = 1 xs.append(x) y = 1 - a ys.append(y) #添加所有泵的action action_seq.append(action) #print(action_seq) #stime=date_time[i] etime = date_time[i + 1] set_datetime.set_date(self.sdate, self.edate, self.stime, etime, self.startfile + '.inp') change_rain.copy_result(self.infile + '.inp', self.startfile + '.inp') set_pump.set_pump(action_seq, date_time[1:i + 1], pumps, self.infile + '.inp') self.simulation(self.infile + '.inp') #获取rpt内信息,当前时刻的flooding,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.infile + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.infile + '.out', self.pool_list, self.date_t[i] - i) #reward计算 当前reward只考虑溢流污染控制 for pool in self.pool_list: reward_sum += (self.GA_flooding_step[i] - flooding) / ( 0.0001 + total_in) #GA算法的flooding数据作为DDQN每步模拟的基准数据 drs.append((self.GA_flooding_step[i] - flooding) / (0.0001 + total_in)) episode_number += 1 #完成一场降雨模拟,更新agent #when the game over, which means the stick fall; means the time is out in the new situation #记录一场降雨的reward epx = np.vstack(xs) epy = np.vstack(ys) epr = np.vstack(drs) xs, ys, drs = [], [], [] discounted_epr = self.discount_reward(epr) discounted_epr -= np.mean(discounted_epr) discounted_epr /= np.std(discounted_epr) tGrad = self.sess.run(self.newGrads, feed_dict={ self.observations: epx, self.input_y: epy, self.advantages: discounted_epr }) for ix, grad in enumerate(tGrad): gradBuffer[ix] += grad #若已有一个batch的reward值,用于更新agent if episode_number % self.batch_size == 0: #print("train") self.sess.run(self.updateGrads, feed_dict={ self.W1Grad: gradBuffer[0], self.W2Grad: gradBuffer[1] }) print('Average reward for %d:%f.' % (episode_number, reward_sum / self.batch_size)) #reward_sum=0 for ix, grad in enumerate(gradBuffer): gradBuffer[ix] = grad * 0 #if abs(old_reward-reward_sum/self.batch_size)/abs(old_reward)<=1e-15: #print("Task soveld in", episode_number) #break #old_reward=reward_sum/self.batch_size #observation=env.reset() s2 = datetime.datetime.now() print(s2 - s1) print("training done") saver = tf.train.Saver() sp = saver.save(self.sess, "./save/model.ckpt") print("model saved:", sp) return drs
def step(self,a,raindata): #修改statf的date,根据iten逐步向前 #加入action #开始模拟,存储结果 self.iten+=1 action=[] pumps=[] #print('a=',a) for pool in self.pool_list: #检测水位,根据水位决定泵启停 flage=0 if self.pool_d[pool]>(self.limit_level[pool][0]) and self.pool_d[pool]<(self.limit_level[pool][2]): flage=0 elif self.pool_d[pool]<(self.limit_level[pool][0]): flage=-1 else: flage=1 #泵的启停策略 if flage==0: if a<self.action_space[1]+0.1: action.append(0) action.append(0) elif a>=self.action_space[1]+0.1 and a<self.action_space[1]+0.6: action.append(0) action.append(1) else: action.append(1) action.append(1) elif flage==-1: action.append(0) action.append(0) else: action.append(1) action.append(1) for item in self.pump_list[pool]: pumps.append(item) #设置pump并模拟之后才有reward self.action_seq.append(action) #print(self.action_seq) set_pump.set_pump(self.action_seq,self.date_time[1:self.iten],pumps,self.orftem+'.inp') tem_etime=self.date_time[self.iten] set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,self.orftem+'.inp') #change_rain.copy_result(infile+'.inp',startfile+'.inp') #还原SWMM缓存inp change_rain.copy_result(self.staf+'.inp',self.orftem+'.inp') change_rain.copy_result(self.orftem+'.inp',self.orf_rain+'.inp') #step forward self.simulation(self.staf+'.inp') #从out和rpt文件读取sate值 #如果iten==最终的时间步,模拟停止 total_in,flooding,store,outflow,upflow,downflow=get_rpt.get_rpt(self.staf+'.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d=get_output.depth(self.staf+'.out',self.pool_list,self.date_t[self.iten]-self.iten) rain_sum=sum(raindata[self.date_t[self.iten]:self.date_t[self.iten+1]])/max(raindata) for pool in self.pool_list: state=np.array([outflow/(0.001+total_in),flooding/(0.001+total_in),store/(0.001+total_in),self.pool_d[pool],rain_sum]) self.simulation(self.staf+'.inp') #reward计算 当前reward只考虑溢流污染控制 ''' #reward1 reward_sum=0 for pool in self.pool_list: if flooding>total_in*0.1: reward_sum+=-1.0 else: reward_sum+=1.0 ''' #try different reward # change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp') # set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp') # #reward2使用的标准比对baseline # self.simulation('compare_tem_HC.inp') # _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt') ''' #reward2 reward_sum=0 for pool in self.pool_list: if flooding_compare!=0.0: reward_sum+=(flooding_compare-flooding)/flooding_compare else: reward_sum+=-flooding/(0.0001+total_in) ''' #与GA算法计算的flooding进行比较 reward_sum=0 for pool in self.pool_list: reward_sum+=(self.GA_flooding_step[self.iten]-flooding)/(0.0001+total_in)#GA算法的flooding数据作为DDQN每步模拟的基准数据 if self.iten==self.T-2: done=True else: done=False return state,reward_sum,done,{}
def GA_sim(startfile, simfile, crossRate, mutationRate, lifeCount, date_time, pumps, stepNum): iten = 0 iten += 1 change_rain.copy_result(simfile + '.inp', startfile + '.inp') #将修改了rain数据的infile inp文件进行复制 action_seq = [] t_reward = [] begin = datetime.datetime.now() #用优化算法生成控制策略二维矩阵action_seq #初始化 lives = initPopulation(lifeCount, len(date_time) * len(pumps)) scores = [] bounds = 0 generation = 0 for gene in lives: tem = np.array(gene) action_seq = list(tem.reshape(len(date_time), len(pumps))) #25*8的数组 #print(action_seq) change_rain.copy_result(simfile + '.inp', startfile + '.inp') #将startfile复制为infile set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps, simfile + '.inp') simulation(simfile + '.inp') #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( simfile + '.rpt') scores.append(1 / (1 + flooding)) score = 1 / (1 + flooding) bounds += score best = lives[scores.index(max(scores))] #print(best) #初始化end begin = datetime.datetime.now() for i in range(stepNum): #评估,计算每一个个体的适配值 newLives = [] newLives.append(best) #把最好的个体加入下一代 while len(newLives) < lifeCount: newLives.append( newChild(crossRate, mutationRate, lives, scores, bounds)) lives = newLives generation += 1 scores = [] bounds = 0 #print('step'+str(i)) for gene in lives: #print(action_seq) tem = np.array(gene) action_seq = list(tem.reshape(len(date_time), len(pumps))) change_rain.copy_result(simfile + '.inp', startfile + '.inp') set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps, simfile + '.inp') simulation(simfile + '.inp') #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp') #获取rpt内信息,产生新的action total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( simfile + '.rpt') score = 1 / (1 + flooding) scores.append(score) bounds += score best = lives[scores.index(max(scores))] max_scors = max(scores) end = datetime.datetime.now() #print(i,' ',end-begin) #最佳策略的模拟结果 tem = np.array(best) action_seq = tem.reshape(len(date_time), len(pumps)) change_rain.copy_result(simfile + '.inp', startfile + '.inp') set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps, simfile + '.inp') simulation(simfile + '.inp') total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( simfile + '.rpt') score = 1 / (1 + flooding) end = datetime.datetime.now() #print('search done, time: ',end-begin) #保存训练的inp与rpt文件 #if(trainBool==True): copy_result('./sim/GA/GA_' + str(iten) + '.inp', simfile + '.inp') #将每次模拟所用的GA inp文件和rpt文件储存起来 copy_result('./sim/GA/GA_' + str(iten) + '.rpt', simfile + '.rpt') # if(testBool==True): # copy_result('./test_result/GA/GA_'+str(iten)+'.inp',simfile+'.inp') # copy_result('./test_result/GA/GA_'+str(iten)+'.rpt',simfile+'.rpt') #print("操控序列:",action_seq.tolist()) #print("得分:",reward_sum) #np.savetxt('./sim/GAActionSeq.txt',action_seq,fmt='%f',delimiter=',') return action_seq
def step(self, a, rainData): #对降雨数据进行逐步模拟 #修改statf的date,根据iten逐步向前 #加入action #开始模拟,存储结果 action = [] pumps = [] self.iten += 1 #print('a=',a) for pool in self.pool_list: #检测水位,根据水位决定泵启停 flage = 0 if self.pool_d[pool] > ( self.limit_level[pool][0]) and self.pool_d[pool] < ( self.limit_level[pool][2]): #当前池水位大于最低开泵水位,小于最高水位时 flage = 0 elif self.pool_d[pool] < ( self.limit_level[pool][0]): #当前池水位小于最低开泵水位时,flage设置为-1 flage = -1 else: flage = 1 #泵的启停策略 if flage == 0: if a < self.action_space[1] + 0.1: #概率小于0.1的时候,不开泵 action.append(0) action.append(0) elif a >= self.action_space[1] + 0.1 and a < self.action_space[ 1] + 0.6: #概率在0.1~0.6的时候,开一个泵 action.append(0) action.append(1) else: #开两个泵 action.append(1) action.append(1) elif flage == -1: action.append(0) action.append(0) else: #flag为1的时候全开 action.append(1) action.append(1) for item in self.pump_list[pool]: pumps.append(item) #设置pump并模拟之后才有reward self.action_seq.append(action) #print(self.action_seq) set_pump.set_pump(self.action_seq, self.date_time[1:self.iten], pumps, self.orftem + '.inp') #对self.orftem+'.inp'文件修改泵的策略 #print(pumps) tem_etime = self.date_time[self.iten] set_datetime.set_date(self.sdate, self.edate, self.stime, tem_etime, self.orftem + '.inp') #在修改泵的策略以后,修改self.orftem+'.inp'文件 #change_rain.copy_result(infile+'.inp',startfile+'.inp') change_rain.copy_result(self.staf + '.inp', self.orftem + '.inp') #将self.orftem+'.inp'文件复制为self.staf文件 change_rain.copy_result(self.orftem + '.inp', self.orf_rain + '.inp') #还原 将self.orftem+'.inp'修改为只修改了降雨数据的文件 self.simulation(self.staf + '.inp') #从out和rpt文件读取sate值 #如果iten==最终的时间步,模拟停止 total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt( self.staf + '.rpt') #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启 self.pool_d = get_output.depth(self.staf + '.out', self.pool_list, self.date_t[self.iten] - self.iten) rain_sum = sum( rainData[self.date_t[self.iten]:self.date_t[self.iten + 1]]) / max(rainData) for pool in self.pool_list: state = np.array([ outflow / (0.001 + total_in), flooding / (0.001 + total_in), store / (0.001 + total_in), self.pool_d[pool], rain_sum ]) #reward计算 当前reward只考虑溢流污染控制 #在这里对每场降雨进行GA运算,并对最后运算出来的泵的控制策略时间序列,进行模拟计算,提取每一步的flooding数据 #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')#还原 #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp') #self.simulation(self.staf+'.inp') 重复的吧? #try different reward #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp') #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp') #reward2使用的标准比对baseline # self.simulation('compare_tem_HC.inp') # _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt') ''' #reward1 reward_sum=0 for pool in self.pool_list: if flooding>total_in*0.1: reward_sum+=-1.0 else: reward_sum+=1.0 ''' ''' #reward2 reward_sum=0 for pool in self.pool_list: if flooding_compare!=0.0: reward_sum+=(flooding_compare-flooding)/flooding_compare else: reward_sum+=-flooding/(0.0001+total_in) ''' #与GA算法计算的flooding进行比较 reward_sum = 0 for pool in self.pool_list: reward_sum += (self.GA_flooding_step[self.iten] - flooding) / ( 0.0001 + total_in) #GA算法的flooding数据作为DDQN每步模拟的基准数据 if self.iten == self.T - 2: done = True else: done = False return state, reward_sum, done, {}