def GA_get_flooding_step(action_seq,startflie,floodStepFile,date_time,pumps):
    action_step=[]
    flooding_step=[]
    for i in range(0,action_seq.shape[0]):
        
        if i==0:
            action_step.append(action_seq[0])
            action_step.append(action_seq[1])
            #print(action_step)
            #action_step_list=action_step.tolist()
            
            change_rain.copy_result(floodStepFile+'.inp',startflie+'.inp')
            #print(date_time[0:i+2])
            
            set_pump.set_pump(action_seq,date_time[0:2],pumps,floodStepFile+'.inp')   
            simulation(floodStepFile+'.inp')
            _,flooding,_,_,_,_=get_rpt.get_rpt(floodStepFile+'.rpt')
            flooding_step.append(flooding)
            #print(flooding)
    
            
        if i>1:
            action_step.append(action_seq[i])
            change_rain.copy_result(floodStepFile+'.inp',startflie+'.inp')
            set_pump.set_pump(action_seq,date_time[0:i+1],pumps,floodStepFile+'.inp')   
            simulation(floodStepFile+'.inp')
            _,flooding,_,_,_,_=get_rpt.get_rpt(floodStepFile+'.rpt')
            flooding_step.append(flooding)
            #print(flooding_step)
    return flooding_step
Example #2
0
                for item in pump_list[pool]:
                    pumps.append(item)
                xs.append(x)
                y = 1 - a
                #y=np.mean(a)
                ys.append(y)

            #设置pump并模拟之后才有reward
            action_seq.append(action)

            #stime=date_time[i]
            etime = date_time[i + 1]
            copy_result(aiinfile_tem + '.inp', aiinfile + '.inp')
            set_datetime.set_date(sdate, edate, stime, etime,
                                  aiinfile_tem + '.inp')
            set_pump.set_pump(action_seq, date_time[1:i + 1], pumps,
                              aiinfile_tem + '.inp')

            simulation(aiinfile_tem + '.inp')
            #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp')
            #获取rpt内信息,产生新的action
            total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                aiinfile_tem + '.rpt')
            #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
            pool_d = get_output.depth(aiinfile_tem + '.out', pool_list,
                                      date_t[i] - i)

            rain_sum = sum(rain[date_t[i]:date_t[i + 1]]) / max(rain)

            for pool in pool_list:
                reward = 0
                if flooding / total_in >= 0.5:
Example #3
0
    def test(self):
        #反复多次进行模拟
        saver = tf.train.Saver()
        saver.restore(self.sess, "./save/model.ckpt")
        etime = date_time[1]
        xs, ys, drs = [], [], []
        pumps = []
        for pool in self.pool_list:  #pool为前池
            for item in self.pump_list[pool]:
                pumps.append(item)

        for iten in range(len(self.testRainData)):
            reward_sum = 0

            change_rain.copy_result(self.startfile + '.inp',
                                    'arg-original.inp')
            change_rain.copy_result(self.infile + '.inp', 'arg-original.inp')

            rainData = self.testRainData[iten]

            change_rain.change_rain(rainData, self.startfile + '.inp')
            self.simulation(self.infile + '.inp')

            change_rain.copy_result(
                self.GA_tem + '.inp', self.startfile +
                '.inp')  #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据
            #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据
            self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile,
                                               self.crossRate,
                                               self.mutationRate,
                                               self.lifeCount, self.date_time,
                                               self.pumps, self.GAStepNum)
            self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step(
                self.GA_action_seq, self.GA_tem, self.GAStepfile,
                self.date_time, self.pumps)

            begin = datetime.datetime.now()

            change_rain.copy_result(
                './sim/test/result/en/inp/' + str(iten) + '.inp',
                self.startfile + '.inp')
            change_rain.copy_result(
                './sim/test/result/en/rpt/' + str(iten) + '.rpt',
                self.startfile + '.rpt')

            set_datetime.set_date(self.sdate, self.edate, self.stime, etime,
                                  self.startfile + '.inp')

            change_rain.copy_result(self.infile + '.inp',
                                    self.startfile + '.inp')

            self.simulation(self.infile + '.inp')
            #获取rpt内信息,产生新的action
            total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                self.infile + '.rpt')
            #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
            self.pool_d = get_output.depth(self.infile + '.out',
                                           self.pool_list, self.date_t[1])

            action_seq = []
            log_reward = ''

            for i in range(1,
                           len(self.date_t) -
                           1):  #用1场降雨生成结果,随机生成batch_size场降雨的inp
                rain_sum = sum(
                    rainData[self.date_t[i]:self.date_t[i +
                                                        1]]) / max(rainData)

                action = []

                for pool in self.pool_list:
                    observation = [
                        outflow / total_in, flooding / total_in,
                        store / total_in, self.pool_d[pool], rain_sum
                    ]
                    x_in = observation
                    x = np.reshape(x_in, [1, self.D])
                    tfprob = self.sess.run(self.probability,
                                           feed_dict={self.observations: x})

                    #对flage初始化为0,针对不同的情况设置flage
                    flage = 0

                    if self.pool_d[pool] > (self.limit_level[pool][
                            0]) and self.pool_d[pool] < (self.limit_level[
                                pool][2]):  #判断是否到达最低水位,当到达最低水位时,设置flag为True
                        flage = 0

                    elif self.pool_d[pool] < (self.limit_level[pool][0]
                                              ):  #当前池水位小于最低开泵水位时,flage设置为-1
                        flage = -1
                    else:
                        flage = 1

                    #泵的启停策略
                    if flage == 0:
                        if tfprob < self.action_space[1] + 0.1:  #概率小于0的时候,不开泵
                            action.append(0)
                            action.append(0)
                            a = 0
                        elif tfprob >= self.action_space[
                                1] + 0.1 and tfprob < self.action_space[
                                    1] + 0.6:  #概率在0.1~0.6的时候,开一个泵
                            action.append(0)
                            action.append(1)
                            a = 1
                        else:  #开两个泵
                            action.append(1)
                            action.append(1)
                            a = 1
                    elif flage == -1:
                        action.append(0)
                        action.append(0)
                        a = 0
                    else:  #flag为1的时候全开
                        action.append(1)
                        action.append(1)
                        a = 1

                    xs.append(x)
                    y = 1 - a
                    ys.append(y)

                #设置pump并模拟之后才有reward
                action_seq.append(action)

                #stime=date_time[i]
                etime = self.date_time[i + 1]
                set_datetime.set_date(self.sdate, self.edate, self.stime,
                                      etime, self.startfile + '.inp')
                change_rain.copy_result(self.infile + '.inp',
                                        self.startfile + '.inp')
                set_pump.set_pump(action_seq, self.date_time[1:i + 1], pumps,
                                  self.infile + '.inp')

                self.simulation(self.infile + '.inp')
                #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp')
                #获取rpt内信息,产生新的action
                total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                    self.infile + '.rpt')
                #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
                self.pool_d = get_output.depth(self.infile + '.out',
                                               self.pool_list, date_t[i] - i)

                #reward计算 当前reward只考虑溢流污染控制
                for pool in self.pool_list:
                    reward_sum += (self.GA_flooding_step[i] - flooding) / (
                        0.0001 + total_in)  #GA算法的flooding数据作为DDQN每步模拟的基准数据
                    drs.append((self.GA_flooding_step[i] - flooding) /
                               (0.0001 + total_in))

                log_reward += str(reward_sum) + '\n'

            end = datetime.datetime.now()
            print(iten, '  ', end - begin)
            f = open('reward' + str(iten) + '.txt', 'w')
            f.write(log_reward)
            #保存inp与rpt文件
            change_rain.copy_result(
                './sim/test/result/ai/inp/' + str(iten) + '.inp',
                './sim/test/oti.inp')
            change_rain.copy_result(
                './sim/test/result/ai/rpt/' + str(iten) + '.rpt',
                './sim/test/oti.rpt')
            print("操控序列:", action_seq)
            print("得分:", reward_sum)
Example #4
0
    def train(self):
        xs, ys, drs = [], [], []
        rendering = False
        init = tf.global_variables_initializer()
        self.sess.run(init)
        gradBuffer = self.sess.run(self.tvars)

        for ix, grad in enumerate(gradBuffer):
            gradBuffer[ix] = grad * 0

        etime = date_time[1]
        episode_number = 0
        print(len(self.trainRainData))

        pumps = []

        for pool in self.pool_list:  #pool为前池
            for item in self.pump_list[pool]:
                pumps.append(item)

        while episode_number < len(self.trainRainData):
            reward_sum = 0

            rainData = self.trainRainData[episode_number]

            s1 = datetime.datetime.now()

            change_rain.copy_result(
                self.startfile + '.inp', 'arg-original.inp'
            )  #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据
            change_rain.copy_result(
                self.infile + '.inp',
                'arg-original.inp')  #初始化生成一个infile文件,infile为每个模拟所用input文件

            change_rain.change_rain(rainData, self.startfile +
                                    '.inp')  #修改start inp文件中的降雨数据
            #print(A,C,P,b,n,R)

            change_rain.copy_result(self.GA_tem + '.inp', self.startfile +
                                    '.inp')  #将修改了降雨数据的输入文件,复制为GA——tem文件
            #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据
            self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile,
                                               self.crossRate,
                                               self.mutationRate,
                                               self.lifeCount, self.date_time,
                                               pumps, self.GAStepNum)
            self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step(
                self.GA_action_seq, self.GA_tem, self.GAStepfile,
                self.date_time, pumps)

            #先sim10min
            set_datetime.set_date(self.sdate, self.edate, self.stime, etime,
                                  self.startfile +
                                  '.inp')  #修改start input文件中的时间数据

            change_rain.copy_result(self.infile + '.inp', self.startfile +
                                    '.inp')  #将修改降雨数据后的start inp文件复制为infile文件
            self.simulation(self.infile + '.inp')

            #获取rpt内信息,产生新的action
            total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                self.infile + '.rpt')

            self.pool_d = get_output.depth(self.infile + '.out',
                                           self.pool_list, self.date_t[1])

            action_seq = []

            for i in range(1, len(self.date_t) - 1):  #对于一场雨中的每一步进行迭代运算
                rain_sum = sum(
                    rainData[self.date_t[i]:self.date_t[i +
                                                        1]]) / max(rainData)

                action = []
                for pool in self.pool_list:  #pool为前池

                    observation = [
                        outflow / (0.001 + total_in),
                        flooding / (0.001 + total_in),
                        store / (0.001 + total_in), self.pool_d[pool], rain_sum
                    ]
                    x_in = observation
                    x = np.reshape(x_in, [1, self.D])
                    tfprob = self.sess.run(self.probability,
                                           feed_dict={self.observations:
                                                      x})  #开关泵概率

                    #对flage初始化为0,针对不同的情况设置flage
                    flage = 0

                    if self.pool_d[pool] > (self.limit_level[pool][0]
                                            ) and self.pool_d[pool] < (
                                                self.limit_level[pool][2]
                                            ):  #判断是否大于最低水位,小于最高水位时,设置flag为0
                        flage = 0

                    elif self.pool_d[pool] < (self.limit_level[pool][0]
                                              ):  #当前池水位小于最低开泵水位时,flage设置为-1
                        flage = -1
                    else:  #否则为1
                        flage = 1

                    #泵的启停策略
                    if flage == 0:
                        if tfprob < self.action_space[1] + 0.1:
                            #概率小于0.1的时候,不开泵
                            action.append(0)
                            action.append(0)
                            a = 0
                        elif tfprob >= self.action_space[
                                1] + 0.1 and tfprob < self.action_space[
                                    1] + 0.6:
                            #概率在0.1~0.6的时候,开一个泵
                            action.append(0)
                            action.append(1)
                            a = 1
                        else:  #开两个泵
                            action.append(1)
                            action.append(1)
                            a = 1
                    elif flage == -1:  #小于最低水位不开泵
                        action.append(0)
                        action.append(0)
                        a = 0
                    else:  #flag为1的时候大于最高水位,泵全开
                        action.append(1)
                        action.append(1)
                        a = 1

                    xs.append(x)
                    y = 1 - a
                    ys.append(y)

                #添加所有泵的action
                action_seq.append(action)
                #print(action_seq)

                #stime=date_time[i]
                etime = date_time[i + 1]
                set_datetime.set_date(self.sdate, self.edate, self.stime,
                                      etime, self.startfile + '.inp')
                change_rain.copy_result(self.infile + '.inp',
                                        self.startfile + '.inp')
                set_pump.set_pump(action_seq, date_time[1:i + 1], pumps,
                                  self.infile + '.inp')

                self.simulation(self.infile + '.inp')

                #获取rpt内信息,当前时刻的flooding,产生新的action
                total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                    self.infile + '.rpt')
                #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
                self.pool_d = get_output.depth(self.infile + '.out',
                                               self.pool_list,
                                               self.date_t[i] - i)

                #reward计算 当前reward只考虑溢流污染控制

                for pool in self.pool_list:
                    reward_sum += (self.GA_flooding_step[i] - flooding) / (
                        0.0001 + total_in)  #GA算法的flooding数据作为DDQN每步模拟的基准数据
                    drs.append((self.GA_flooding_step[i] - flooding) /
                               (0.0001 + total_in))

            episode_number += 1
            #完成一场降雨模拟,更新agent
            #when the game over, which means the stick fall; means the time is out in the new situation
            #记录一场降雨的reward
            epx = np.vstack(xs)
            epy = np.vstack(ys)
            epr = np.vstack(drs)
            xs, ys, drs = [], [], []
            discounted_epr = self.discount_reward(epr)
            discounted_epr -= np.mean(discounted_epr)
            discounted_epr /= np.std(discounted_epr)

            tGrad = self.sess.run(self.newGrads,
                                  feed_dict={
                                      self.observations: epx,
                                      self.input_y: epy,
                                      self.advantages: discounted_epr
                                  })
            for ix, grad in enumerate(tGrad):
                gradBuffer[ix] += grad

            #若已有一个batch的reward值,用于更新agent
            if episode_number % self.batch_size == 0:
                #print("train")
                self.sess.run(self.updateGrads,
                              feed_dict={
                                  self.W1Grad: gradBuffer[0],
                                  self.W2Grad: gradBuffer[1]
                              })
                print('Average reward for %d:%f.' %
                      (episode_number, reward_sum / self.batch_size))
                #reward_sum=0
                for ix, grad in enumerate(gradBuffer):
                    gradBuffer[ix] = grad * 0

                #if abs(old_reward-reward_sum/self.batch_size)/abs(old_reward)<=1e-15:
                #print("Task soveld in", episode_number)
                #break
                #old_reward=reward_sum/self.batch_size

            #observation=env.reset()

            s2 = datetime.datetime.now()
            print(s2 - s1)
        print("training done")
        saver = tf.train.Saver()
        sp = saver.save(self.sess, "./save/model.ckpt")
        print("model saved:", sp)
        return drs
    def step(self,a,raindata):
        #修改statf的date,根据iten逐步向前
        #加入action
        #开始模拟,存储结果

        self.iten+=1
        action=[]
        pumps=[]
        
        #print('a=',a)
        for pool in self.pool_list:
            #检测水位,根据水位决定泵启停
            flage=0
            if self.pool_d[pool]>(self.limit_level[pool][0]) and self.pool_d[pool]<(self.limit_level[pool][2]):
                flage=0
            elif self.pool_d[pool]<(self.limit_level[pool][0]):
                flage=-1
            else:
                flage=1
            
            #泵的启停策略
            if flage==0:
                if a<self.action_space[1]+0.1:
                    action.append(0)
                    action.append(0)
                elif a>=self.action_space[1]+0.1 and a<self.action_space[1]+0.6:
                    action.append(0)
                    action.append(1)
                else:
                    action.append(1)
                    action.append(1)               
            elif flage==-1:
                action.append(0)
                action.append(0)
            else:
                action.append(1)
                action.append(1)
                
                
            for item in self.pump_list[pool]:
                pumps.append(item)
                
            #设置pump并模拟之后才有reward
            self.action_seq.append(action)
            #print(self.action_seq)

        set_pump.set_pump(self.action_seq,self.date_time[1:self.iten],pumps,self.orftem+'.inp')
        
        
        tem_etime=self.date_time[self.iten]
        set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,self.orftem+'.inp')
        #change_rain.copy_result(infile+'.inp',startfile+'.inp')
        
        #还原SWMM缓存inp
        change_rain.copy_result(self.staf+'.inp',self.orftem+'.inp')
        change_rain.copy_result(self.orftem+'.inp',self.orf_rain+'.inp')
        
        #step forward
        self.simulation(self.staf+'.inp')

        #从out和rpt文件读取sate值
        #如果iten==最终的时间步,模拟停止
        total_in,flooding,store,outflow,upflow,downflow=get_rpt.get_rpt(self.staf+'.rpt')
        #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
        self.pool_d=get_output.depth(self.staf+'.out',self.pool_list,self.date_t[self.iten]-self.iten)
        rain_sum=sum(raindata[self.date_t[self.iten]:self.date_t[self.iten+1]])/max(raindata)
        
        for pool in self.pool_list:
            state=np.array([outflow/(0.001+total_in),flooding/(0.001+total_in),store/(0.001+total_in),self.pool_d[pool],rain_sum])
        
        self.simulation(self.staf+'.inp')
        
        #reward计算 当前reward只考虑溢流污染控制
        '''
        #reward1
        reward_sum=0
        for pool in self.pool_list:
            if flooding>total_in*0.1:
                reward_sum+=-1.0
            else:
                reward_sum+=1.0
        '''
        
        #try different reward
#        change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')
#        set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp')
#         #reward2使用的标准比对baseline
#        self.simulation('compare_tem_HC.inp')
#        _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt')
        
        '''
        #reward2
        reward_sum=0
        for pool in self.pool_list:
            if flooding_compare!=0.0:
                reward_sum+=(flooding_compare-flooding)/flooding_compare
            else:
                reward_sum+=-flooding/(0.0001+total_in)
        '''
                
        #与GA算法计算的flooding进行比较        
        reward_sum=0
        for pool in self.pool_list:
            reward_sum+=(self.GA_flooding_step[self.iten]-flooding)/(0.0001+total_in)#GA算法的flooding数据作为DDQN每步模拟的基准数据
        
        if self.iten==self.T-2:
            done=True
        else:
            done=False

        return state,reward_sum,done,{}
Example #6
0
def GA_sim(startfile, simfile, crossRate, mutationRate, lifeCount, date_time,
           pumps, stepNum):
    iten = 0
    iten += 1

    change_rain.copy_result(simfile + '.inp',
                            startfile + '.inp')  #将修改了rain数据的infile inp文件进行复制

    action_seq = []
    t_reward = []
    begin = datetime.datetime.now()
    #用优化算法生成控制策略二维矩阵action_seq

    #初始化
    lives = initPopulation(lifeCount, len(date_time) * len(pumps))
    scores = []
    bounds = 0
    generation = 0

    for gene in lives:

        tem = np.array(gene)

        action_seq = list(tem.reshape(len(date_time), len(pumps)))  #25*8的数组
        #print(action_seq)
        change_rain.copy_result(simfile + '.inp',
                                startfile + '.inp')  #将startfile复制为infile

        set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps,
                          simfile + '.inp')
        simulation(simfile + '.inp')
        #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp')
        #获取rpt内信息,产生新的action
        total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
            simfile + '.rpt')
        scores.append(1 / (1 + flooding))

        score = 1 / (1 + flooding)
        bounds += score
    best = lives[scores.index(max(scores))]
    #print(best)
    #初始化end

    begin = datetime.datetime.now()

    for i in range(stepNum):

        #评估,计算每一个个体的适配值
        newLives = []
        newLives.append(best)  #把最好的个体加入下一代
        while len(newLives) < lifeCount:
            newLives.append(
                newChild(crossRate, mutationRate, lives, scores, bounds))
        lives = newLives
        generation += 1

        scores = []
        bounds = 0
        #print('step'+str(i))
        for gene in lives:
            #print(action_seq)
            tem = np.array(gene)
            action_seq = list(tem.reshape(len(date_time), len(pumps)))

            change_rain.copy_result(simfile + '.inp', startfile + '.inp')
            set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1],
                              pumps, simfile + '.inp')
            simulation(simfile + '.inp')
            #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp')
            #获取rpt内信息,产生新的action
            total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                simfile + '.rpt')
            score = 1 / (1 + flooding)
            scores.append(score)

            bounds += score
        best = lives[scores.index(max(scores))]
        max_scors = max(scores)
        end = datetime.datetime.now()
        #print(i,'  ',end-begin)

    #最佳策略的模拟结果
    tem = np.array(best)
    action_seq = tem.reshape(len(date_time), len(pumps))

    change_rain.copy_result(simfile + '.inp', startfile + '.inp')
    set_pump.set_pump(action_seq, date_time[0:len(date_time) - 1], pumps,
                      simfile + '.inp')
    simulation(simfile + '.inp')
    total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
        simfile + '.rpt')
    score = 1 / (1 + flooding)

    end = datetime.datetime.now()
    #print('search done, time: ',end-begin)

    #保存训练的inp与rpt文件
    #if(trainBool==True):

    copy_result('./sim/GA/GA_' + str(iten) + '.inp',
                simfile + '.inp')  #将每次模拟所用的GA inp文件和rpt文件储存起来
    copy_result('./sim/GA/GA_' + str(iten) + '.rpt', simfile + '.rpt')
    #   if(testBool==True):
    #        copy_result('./test_result/GA/GA_'+str(iten)+'.inp',simfile+'.inp')
    #        copy_result('./test_result/GA/GA_'+str(iten)+'.rpt',simfile+'.rpt')
    #print("操控序列:",action_seq.tolist())
    #print("得分:",reward_sum)

    #np.savetxt('./sim/GAActionSeq.txt',action_seq,fmt='%f',delimiter=',')
    return action_seq
    def step(self, a, rainData):  #对降雨数据进行逐步模拟
        #修改statf的date,根据iten逐步向前
        #加入action
        #开始模拟,存储结果
        action = []
        pumps = []

        self.iten += 1

        #print('a=',a)
        for pool in self.pool_list:
            #检测水位,根据水位决定泵启停
            flage = 0
            if self.pool_d[pool] > (
                    self.limit_level[pool][0]) and self.pool_d[pool] < (
                        self.limit_level[pool][2]):  #当前池水位大于最低开泵水位,小于最高水位时
                flage = 0
            elif self.pool_d[pool] < (
                    self.limit_level[pool][0]):  #当前池水位小于最低开泵水位时,flage设置为-1
                flage = -1
            else:
                flage = 1

            #泵的启停策略
            if flage == 0:
                if a < self.action_space[1] + 0.1:  #概率小于0.1的时候,不开泵
                    action.append(0)
                    action.append(0)
                elif a >= self.action_space[1] + 0.1 and a < self.action_space[
                        1] + 0.6:  #概率在0.1~0.6的时候,开一个泵
                    action.append(0)
                    action.append(1)
                else:  #开两个泵
                    action.append(1)
                    action.append(1)
            elif flage == -1:
                action.append(0)
                action.append(0)
            else:  #flag为1的时候全开
                action.append(1)
                action.append(1)

            for item in self.pump_list[pool]:
                pumps.append(item)

            #设置pump并模拟之后才有reward
            self.action_seq.append(action)
            #print(self.action_seq)

        set_pump.set_pump(self.action_seq, self.date_time[1:self.iten], pumps,
                          self.orftem + '.inp')  #对self.orftem+'.inp'文件修改泵的策略
        #print(pumps)

        tem_etime = self.date_time[self.iten]
        set_datetime.set_date(self.sdate, self.edate, self.stime, tem_etime,
                              self.orftem +
                              '.inp')  #在修改泵的策略以后,修改self.orftem+'.inp'文件
        #change_rain.copy_result(infile+'.inp',startfile+'.inp')

        change_rain.copy_result(self.staf + '.inp', self.orftem +
                                '.inp')  #将self.orftem+'.inp'文件复制为self.staf文件
        change_rain.copy_result(self.orftem + '.inp', self.orf_rain +
                                '.inp')  #还原 将self.orftem+'.inp'修改为只修改了降雨数据的文件

        self.simulation(self.staf + '.inp')

        #从out和rpt文件读取sate值
        #如果iten==最终的时间步,模拟停止
        total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
            self.staf + '.rpt')
        #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
        self.pool_d = get_output.depth(self.staf + '.out', self.pool_list,
                                       self.date_t[self.iten] - self.iten)

        rain_sum = sum(
            rainData[self.date_t[self.iten]:self.date_t[self.iten +
                                                        1]]) / max(rainData)

        for pool in self.pool_list:
            state = np.array([
                outflow / (0.001 + total_in), flooding / (0.001 + total_in),
                store / (0.001 + total_in), self.pool_d[pool], rain_sum
            ])

        #reward计算 当前reward只考虑溢流污染控制
        #在这里对每场降雨进行GA运算,并对最后运算出来的泵的控制策略时间序列,进行模拟计算,提取每一步的flooding数据

        #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')#还原
        #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp')
        #self.simulation(self.staf+'.inp') 重复的吧?

        #try different reward
        #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')
        #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp')
        #reward2使用的标准比对baseline


#        self.simulation('compare_tem_HC.inp')
#        _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt')
        '''
        #reward1
        reward_sum=0
        for pool in self.pool_list:
            if flooding>total_in*0.1:
                reward_sum+=-1.0
            else:
                reward_sum+=1.0
        '''
        '''
        #reward2
        reward_sum=0
        for pool in self.pool_list:
            if flooding_compare!=0.0:
                reward_sum+=(flooding_compare-flooding)/flooding_compare
            else:
                reward_sum+=-flooding/(0.0001+total_in)
        '''
        #与GA算法计算的flooding进行比较
        reward_sum = 0
        for pool in self.pool_list:
            reward_sum += (self.GA_flooding_step[self.iten] - flooding) / (
                0.0001 + total_in)  #GA算法的flooding数据作为DDQN每步模拟的基准数据

        if self.iten == self.T - 2:
            done = True
        else:
            done = False

        return state, reward_sum, done, {}