Ejemplo n.º 1
0
    def run_step(self, actions=None):
        """ 按照给定actions运行一步。

        :param actions: str. “random",随机初始化,仅用于测试;
                        or dict. 动作集合,由load_action函数执行动作。
        :return: (np.array, float, bool). 状态量、回报值、结束标志。
        """
        self.step += 1
        path = self.get_ep_path(step=self.step)
        if actions == 'random':  # just for test
            distribute_generators_p(self.power.data['generator'], 1., sigma=0.1)
            distribute_loads_p(self.power.data['load'], 1., p_sigma=0.1,
                               keep_factor=True, factor_sigma=0.1)
        elif actions is not None:
            self.load_action(actions)
        shutil.rmtree(path, ignore_errors=True)
        self.power.save_power(path, self.fmt, lf=True, lp=False, st=True)
        shutil.copy(os.path.join(self.base_path, 'LF.L0'), path)
        shutil.copy(os.path.join(self.base_path, 'ST.S0'), path)
        call_wmlf(path)
        if check_lfcal(path):
            self.power.drop_data(self.fmt, 'lp')
            self.power.load_power(path, self.fmt, lf=False, lp=True, st=False)
            self.power.data['generator']['p0'] = self.power.data['generator']['p']
            if self.step == 0:
                self.load_init_info()
            state = self.get_state()
            if os.name != 'nt':
                call_psa(path)
                assess, done, _ = self.make_assessment(path, method='min', min_n=3)
            else:
                assess = np.random.rand()
                done = (assess < 0.1)
        else:
            state = []
            assess = PF_NOTCONV_REWARD
            done = True
        self.assessments.append(assess)
        if self.step == 0:
            reward = 0.
        else:
            reward = self.assessments[-1] - self.assessments[-2] + STEP_REWARD
            # reward = assess
            if not done:
                # reward *= CCT_CHANGE_RATIO
                loads = self.power.data['load']
                load_p = np.sum(loads.loc[loads['mark'] == 1, 'p0'])
                if abs(load_p - self.init_load_p) / self.init_load_p >= LOAD_CHANGE_THR:
                    reward += PF_LOADFULL_REWARD
                    done = True
            else:
                reward = assess
        return state, reward, done
Ejemplo n.º 2
0
def run_tau(ppo, env, policy_d='ppo', policy_g='ppo', greedy=False,
            lock_d=False, lock_g=False, def_actions_d=None):
    tau = []
    step = min(len(def_actions_d), TAU_LEN) if def_actions_d else TAU_LEN
    for i in range(step):
        # loads = env.power.data['load']
        # generators = env.power.data['generator']
        # L step
        act_d = None
        state = env.get_state()
        if not lock_d:
            valid = (env.power.data['load']['mark'] == 1)
            load_p0 = np.sum(env.power.data['load'].loc[valid, 'p0'])
            if policy_d == 'ppo':
                act_d = ppo.choose_action(state, 'd', greedy=greedy)
            elif policy_d == 'def':
                act_d = def_actions_d[i]
            env.load_action({'load_ratio_p': act_d})
            if policy_g != 'nop':
                delta = np.sum(env.power.data['load'].loc[valid, 'p0']) - load_p0
                distribute_generators_p(env.power.data['generator'], delta)
        next_state, r, done = env.run_step()
        tau.append([state, ppo.get_v(state).numpy(), act_d, r, 0.])
        if done:
            break
        # G step
        state = next_state
        act_g = None
        if not lock_g:
            valid = (env.power.data['generator']['mark'] == 1)
            gen_p0 = np.sum(env.power.data['generator'].loc[valid, 'p0'])
            if policy_g == 'ppo':
                act_g = ppo.choose_action(state, 'g', greedy=greedy)
                env.load_action({'generator_ratio_p': act_g})
                delta = np.sum(env.power.data['generator'].loc[valid, 'p0']) - gen_p0
                distribute_generators_p(env.power.data['generator'], -delta)
        next_state, r, done = env.run_step()
        tau.append([state, ppo.get_v(state).numpy(), act_g, r, 0.])
        if done:
            break
    v_s_ = ppo.get_v(next_state.astype(np.float32)).numpy() if not done else 0.
    for i in reversed(range(len(tau))):
        v_s_ = tau[i][3] + GAMMA * v_s_
        tau[i][4] = v_s_
    mem_d = [tau[i] for i in range(len(tau)) if i % 2 == 0]
    mem_g = [tau[i] for i in range(len(tau)) if i % 2 == 1]
    return mem_d, mem_g
Ejemplo n.º 3
0
 def random_generate(base_path, fmt, size, out_path,
                     min_p=None, max_p=None, gl_ratio=0.9,
                     random_q0=True, random_open=False, open_prob=[0.8]):
     power = Power(fmt=fmt)
     power.load_power(base_path, fmt=fmt)
     generators_bak = power.data['generator'].copy()
     loads_bak = power.data['load'].copy()
     if random_open:
         aclines_bak = power.data['acline'].copy()
     min_p = np.sum(generators_bak['pmin']) if not min_p else min_p
     max_p = np.sum(generators_bak['pmax']) if not max_p else max_p
     p0 = np.sum(generators_bak['p0'])
     shutil.rmtree(out_path, ignore_errors=True)
     os.mkdir(out_path)
     conv_count = 0
     for i in range(size):
         generators = power.data['generator'] = generators_bak.copy()
         loads = power.data['load'] = loads_bak.copy()
         if random_open:
             power.data['acline'] = aclines_bak.copy()
         p = min_p + (max_p - min_p) * np.random.rand()
         distribute_generators_p(generators, p - p0, sigma=0.2)
         gen_p = np.sum(generators['p0'])
         load_p = np.sum(loads['p0'])
         distribute_loads_p(loads, gl_ratio * gen_p - load_p,
                            p_sigma=0.2, keep_factor=False)
         if random_q0:
             random_load_q0(loads, sigma=None)
         if random_open:
             open_num = np.sum(np.random.rand(1) > open_prob)
             random_open_acline(power, num=open_num)
         path = os.path.join(out_path, '%08d' % i)
         power.save_power(path, fmt, lf=True, lp=False, st=True)
         shutil.copy(os.path.join(base_path, 'LF.L0'), path)
         shutil.copy(os.path.join(base_path, 'ST.S0'), path)
         call_wmlf(path)
         if check_lfcal(path):
             conv_count += 1
     print('Random generate done: %d / %d' % (conv_count, size))
Ejemplo n.º 4
0
def run_episode(ppo,
                env,
                policy_d='ppo',
                policy_g='ppo',
                greedy=False,
                def_actions_d=None):
    states_d, states_g, actions_d, actions_g, rewards = [], [], [], [], []
    step = min(len(def_actions_d), EP_LEN) if def_actions_d else EP_LEN
    for i in range(step):
        loads = env.power.data['load']
        generators = env.power.data['generator']
        s_d = env.get_state()
        load_p = np.sum(loads.loc[loads['mark'] == 1, 'p0'])
        if policy_d == 'ppo':
            act_d = ppo.choose_action(s_d, 'd', greedy=greedy)
            actions_d.append(act_d)
            env.load_action({'load_ratio_p': act_d})
        elif policy_d == 'def':
            env.load_action({'load_ratio_p': def_actions_d[i]})
        s_g = env.get_state()
        delta = np.sum(loads.loc[loads['mark'] == 1, 'p0']) - load_p
        if policy_g == 'ppo':
            act_g = ppo.choose_action(s_g, 'g', greedy=greedy)
            actions_g.append(act_g)
            act_g = {'generator_ratio_p': act_g}
        else:
            if policy_g == 'dis':
                distribute_generators_p(generators, delta)
            act_g = None

        s_, r, done = env.run_step(act_g)
        states_d.append(s_d)
        states_g.append(s_g)
        rewards.append(r)
        if done:
            break
    return done, s_, states_d, states_g, actions_d, actions_g, rewards
Ejemplo n.º 5
0
    def reset(self, random=True, load_path=None, error='raise'):
        """ 重置潮流,并进行评估。

        :param random: bool. 是否随机初始化潮流。
        :param load_path: str. 初始断面目录;
                          or None. 用self.base_path作为初始断面。
        :param errpr: str. 初始化失败则raise exception.
        :return: bool. 是否重置成果(not done)
        """
        load_path = self.base_path if load_path is None else load_path
        self.episode += 1
        self.min_max = None
        self.init_load_p = 0.
        path = self.get_ep_path()
        shutil.rmtree(path, ignore_errors=True)
        os.mkdir(path)
        self.step = -1
        self.assessments = []
        self.power.load_power(load_path, fmt=self.fmt)
        if random:
            generators = self.power.data['generator']
            loads = self.power.data['load']
            generators['p0'] = generators['p']
            # gl_rate = np.sum(generators['p']) / np.sum(loads['p'])
            max_p, p0 = np.sum(generators[['pmax', 'p0']])
            p = max_p * (0.4 + 0.5 * np.random.rand())  # 40% ~ 90%
            distribute_generators_p(generators, p - p0, sigma=0.2)
            # dp = np.sum(generators['p0']) - p0
            gen_p = np.sum(generators['p0'])
            load_p = np.sum(loads['p0'])
            distribute_loads_p(loads, 0.9 * gen_p - load_p, p_sigma=0.1, keep_factor=False)
            # distribute_loads_p(loads, dp / gl_rate, p_sigma=0.1, keep_factor=False)
            random_load_q0(loads, sigma=None)
        self.state0, _, done = self.run_step()
        if done and error == 'raise':
            raise ValueError
        return not done
Ejemplo n.º 6
0
    def reset(self, random=True, load_path=None):
        """ 重置潮流,并进行评估。

        :param random: bool. 是否随机初始化潮流。
        :param load_path: str. 初始断面目录;
                          or None. 用self.base_path作为初始断面。
        :return: bool. 是否重置成果(not done)
        """
        if load_path is None:
            self.power.load_power(self.base_path, fmt=self.fmt)
        else:
            self.power.load_power(load_path, fmt=self.fmt)
        self.power.data['generator']['p0'] = self.power.data['generator']['p']
        self.episode += 1
        path = self.get_ep_path()
        if os.path.exists(path):
            shutil.rmtree(path)
        os.mkdir(path)
        self.step = -1
        self.assessments = []
        if random:
            generators = self.power.data['generator']
            loads = self.power.data['load']
            max_p, gen_p = np.sum(generators[['pmax', 'p']])
            p = max_p * 0.4 + max_p * 0.5 * np.random.rand()  # 40% ~ 90%
            distribute_generators_p(generators, p - gen_p, sigma=0.2)
            generators['p0'] = np.clip(generators['p0'],
                                       generators['pmin'], generators['pmax'])
            gen_p = np.sum(generators['p0'])
            load_p = np.sum(loads['p'])
            distribute_loads_p(loads, 0.9 * gen_p - load_p, p_sigma=0.1, keep_factor=False)
            random_load_q0(loads, sigma=None)
        self.min_max = None
        self.init_load_p = 0.
        self.state0, _, done = self.run_step()
        return not done