Beispiel #1
0
    def __init__(self, env):
        # =============== Init Observation Space =================
        self.env = env
        self.gym_obs_space = GymObservationSpace(self.env)
        self.all_obs = []
        obs = self.env.reset()
        gym_obs = self.gym_obs_space.to_gym(obs)
        for key in gym_obs.items():
            self.all_obs.append(key)
        self.obs_list = [
            "prod_p", "prod_v", "load_p", "load_q", "rho", "topo_vect"
        ]
        init_obs = self.convert_obs(obs)
        #print("obs_shape = ", init_obs.shape) # (39,)

        # =============== Init Action Space =================
        self.converter = grid2op.Converter.IdToAct(self.env.action_space)
        # action number 0 = DoNothing
        self.converter.init_converter(
            set_line_status=False,  # 40 
            change_line_status=True,  # 8
            change_bus_vect=False,  # 59
            set_topo_vect=True,  # 58
        )
        self.gym_action_space = GymActionSpace(action_space=self.converter)
        ACT_SIZE = len(self.converter.all_actions)
        #print("action space size= ", ACT_SIZE) # 68
        #print("gym_action_space = ", self.gym_action_space) # Dict(action:Discrete(68))
        gym_action = self.gym_action_space.sample()
        #print("sample_action = ", gym_action) # OrderedDict([('action', 34)])
        encoded_action = self.gym_action_space.from_gym(gym_action)  # 34
        self.num_actions = ACT_SIZE
Beispiel #2
0
 def test_creation(self):
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore")
         with make("l2rpn_wcci_2020", test=True) as env:
             # test i can create
             converter = self.init_converter(env)
             act_space = GymActionSpace(converter)
             act_space.sample()
Beispiel #3
0
    def test_json(self):
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            with make("l2rpn_wcci_2020", test=True) as env:
                # test i can create
                obs_space = GymObservationSpace(env)
                act_space = GymActionSpace(env.action_space)

                obs_space.seed(0)
                act_space.seed(0)

                self._aux_test_json(obs_space)
                self._aux_test_json(act_space)
Beispiel #4
0
class GridAgent():
    def __init__(self, env):
        # =============== Init Observation Space =================
        self.env = env
        self.gym_obs_space = GymObservationSpace(self.env)
        self.all_obs = []
        obs = self.env.reset()
        gym_obs = self.gym_obs_space.to_gym(obs)
        for key in gym_obs.items():
            self.all_obs.append(key)
        self.obs_list = [
            "prod_p", "prod_v", "load_p", "load_q", "rho", "topo_vect"
        ]
        init_obs = self.convert_obs(obs)
        #print("obs_shape = ", init_obs.shape) # (39,)

        # =============== Init Action Space =================
        self.converter = grid2op.Converter.IdToAct(self.env.action_space)
        # action number 0 = DoNothing
        self.converter.init_converter(
            set_line_status=False,  # 40 
            change_line_status=True,  # 8
            change_bus_vect=False,  # 59
            set_topo_vect=True,  # 58
        )
        self.gym_action_space = GymActionSpace(action_space=self.converter)
        ACT_SIZE = len(self.converter.all_actions)
        #print("action space size= ", ACT_SIZE) # 68
        #print("gym_action_space = ", self.gym_action_space) # Dict(action:Discrete(68))
        gym_action = self.gym_action_space.sample()
        #print("sample_action = ", gym_action) # OrderedDict([('action', 34)])
        encoded_action = self.gym_action_space.from_gym(gym_action)  # 34
        self.num_actions = ACT_SIZE

    # grid observation -> gym observation
    def convert_obs(self, obs):
        gym_obs = self.gym_obs_space.to_gym(obs)
        obs = []
        for key, val in gym_obs.items():
            if key in self.obs_list:
                obs.extend(val)
        obs = tf.convert_to_tensor(obs, dtype=tf.float32)

        return obs

    # grid action id (int) -> grid action
    def id_to_act(self, encoded_act):
        grid2op_action = self.converter.convert_act(
            encoded_act)  # action for grid2op (abstract)
        return grid2op_action
Beispiel #5
0
    def __init__(self,
                 env,
                 units=[32, 32],
                 buffer_size=5000,
                 learning_rate=0.01,
                 init_epsilon=1.,
                 epsilon_decay=0.99,
                 min_epsilon=0.01,
                 gamma=0.98,
                 batch_size=16,
                 target_update_iter=300,
                 train_nums=5000,
                 start_learning=64,
                 max_iter=200):
        # =============== Init Observation Space =================
        self.env = env
        self.gym_obs_space = GymObservationSpace(self.env)
        self.all_obs = []
        obs = self.env.reset()
        gym_obs = self.gym_obs_space.to_gym(obs)
        for key in gym_obs.items():
            self.all_obs.append(key)
        self.obs_list = [
            "prod_p", "prod_v", "load_p", "load_q", "rho", "topo_vect"
        ]
        init_obs = self.convert_obs(obs)
        #print("obs_shape = ", init_obs.shape) # (39,)

        # =============== Init Action Space =================
        self.converter = grid2op.Converter.IdToAct(self.env.action_space)
        # action number 0 = DoNothing
        self.converter.init_converter(
            set_line_status=False,  # 40 
            change_line_status=True,  # 8
            change_bus_vect=False,  # 59
            set_topo_vect=True,  # 58
        )
        self.gym_action_space = GymActionSpace(action_space=self.converter)
        ACT_SIZE = len(self.converter.all_actions)
        #print("action space size= ", ACT_SIZE) # 68
        #print("gym_action_space = ", self.gym_action_space) # Dict(action:Discrete(68))
        gym_action = self.gym_action_space.sample()
        #print("sample_action = ", gym_action) # OrderedDict([('action', 34)])
        encoded_action = self.gym_action_space.from_gym(gym_action)  # 34
        self.num_actions = ACT_SIZE
        '''
        print("=======================alll action ==================")
        for i in range(ACT_SIZE):
            print("action number = ", i)
            print(self.converter.convert_act(i))
        print("buffer_size : ", buffer_size)
        print("env : ", self.env)
        '''
        self.units = units
        DQNAgent.__init__(self,
                          self.env,
                          obs=init_obs,
                          num_actions=ACT_SIZE,
                          buffer_size=buffer_size,
                          learning_rate=learning_rate,
                          init_epsilon=init_epsilon,
                          epsilon_decay=epsilon_decay,
                          min_epsilon=min_epsilon,
                          gamma=gamma,
                          batch_size=batch_size,
                          target_update_iter=target_update_iter,
                          train_nums=train_nums,
                          start_learning=start_learning)
        #print(self.gym_action_space)
        self.max_iter = max_iter
Beispiel #6
0
class GridAgent(DQNAgent):
    def __init__(self,
                 env,
                 units=[32, 32],
                 buffer_size=5000,
                 learning_rate=0.01,
                 init_epsilon=1.,
                 epsilon_decay=0.99,
                 min_epsilon=0.01,
                 gamma=0.98,
                 batch_size=16,
                 target_update_iter=300,
                 train_nums=5000,
                 start_learning=64,
                 max_iter=200):
        # =============== Init Observation Space =================
        self.env = env
        self.gym_obs_space = GymObservationSpace(self.env)
        self.all_obs = []
        obs = self.env.reset()
        gym_obs = self.gym_obs_space.to_gym(obs)
        for key in gym_obs.items():
            self.all_obs.append(key)
        self.obs_list = [
            "prod_p", "prod_v", "load_p", "load_q", "rho", "topo_vect"
        ]
        init_obs = self.convert_obs(obs)
        #print("obs_shape = ", init_obs.shape) # (39,)

        # =============== Init Action Space =================
        self.converter = grid2op.Converter.IdToAct(self.env.action_space)
        # action number 0 = DoNothing
        self.converter.init_converter(
            set_line_status=False,  # 40 
            change_line_status=True,  # 8
            change_bus_vect=False,  # 59
            set_topo_vect=True,  # 58
        )
        self.gym_action_space = GymActionSpace(action_space=self.converter)
        ACT_SIZE = len(self.converter.all_actions)
        #print("action space size= ", ACT_SIZE) # 68
        #print("gym_action_space = ", self.gym_action_space) # Dict(action:Discrete(68))
        gym_action = self.gym_action_space.sample()
        #print("sample_action = ", gym_action) # OrderedDict([('action', 34)])
        encoded_action = self.gym_action_space.from_gym(gym_action)  # 34
        self.num_actions = ACT_SIZE
        '''
        print("=======================alll action ==================")
        for i in range(ACT_SIZE):
            print("action number = ", i)
            print(self.converter.convert_act(i))
        print("buffer_size : ", buffer_size)
        print("env : ", self.env)
        '''
        self.units = units
        DQNAgent.__init__(self,
                          self.env,
                          obs=init_obs,
                          num_actions=ACT_SIZE,
                          buffer_size=buffer_size,
                          learning_rate=learning_rate,
                          init_epsilon=init_epsilon,
                          epsilon_decay=epsilon_decay,
                          min_epsilon=min_epsilon,
                          gamma=gamma,
                          batch_size=batch_size,
                          target_update_iter=target_update_iter,
                          train_nums=train_nums,
                          start_learning=start_learning)
        #print(self.gym_action_space)
        self.max_iter = max_iter

    # grid observation -> gym observation
    def convert_obs(self, obs):
        gym_obs = self.gym_obs_space.to_gym(obs)
        obs = []
        for key, val in gym_obs.items():
            if key in self.obs_list:
                obs.extend(val)
        return np.array(obs)

    # grid action id (int) -> grid action
    def id_to_act(self, encoded_act):
        grid2op_action = self.converter.convert_act(
            encoded_act)  # action for grid2op (abstract)
        return grid2op_action

    def grid_evaluation(self, action_type='network', epi=0):
        done = False
        obs, done, ep_reward, steps = self.env.reset(), False, 0, 0
        msg = 'SmartGrid Episode {}'.format(epi)
        for t in notebook.tqdm(range(1, 2017), desc=msg):
            converted_obs = self.convert_obs(obs)
            #print("convt_obs : ",converted_obs)
            if action_type == "network":
                action = self.model.action_value(converted_obs[None])
            elif action_type == "random":
                action = np.random.randint(self.num_actions)
            elif action_type == "do_nothing":
                action = 0
            #print("act : ",action)
            converted_act = self.id_to_act(action)
            #print("convt_act : ",converted_act)
            obs, reward, done, _ = self.env.step(converted_act)
            ep_reward += reward
            steps += 1
            if done:
                break
        self.env.close()
        return steps, ep_reward
Beispiel #7
0
    def test_to_from_gym_act(self):
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            with make("l2rpn_wcci_2020", test=True) as env:
                converter = self.init_converter(env)
                act_space = GymActionSpace(converter)
                act_space.seed(0)
                converter.seed(0)

                gym_act = act_space.sample()
                act = act_space.from_gym(gym_act)
                self._aux_test_json(act_space, gym_act)
                gym_act2 = act_space.to_gym(act)
                act2 = act_space.from_gym(gym_act2)
                g2op_act = converter.convert_act(act)
                g2op_act2 = converter.convert_act(act2)
                assert g2op_act == g2op_act2

                act_space.seed(0)
                for i in range(10):
                    gym_act = act_space.sample()
                    act = act_space.from_gym(gym_act)
                    self._aux_test_json(act_space, gym_act)
                    gym_act2 = act_space.to_gym(act)
                    act2 = act_space.from_gym(gym_act2)
                    g2op_act = converter.convert_act(act)
                    g2op_act2 = converter.convert_act(act2)
                    assert g2op_act == g2op_act2
Beispiel #8
0
    def test_to_from_gym_act(self):
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            with make("l2rpn_wcci_2020", test=True) as env:
                act_space = GymActionSpace(env.action_space)

                act = env.action_space()
                gym_act = act_space.to_gym(act)
                self._aux_test_json(act_space, gym_act)
                assert act_space.contains(gym_act)
                act2 = act_space.from_gym(gym_act)
                assert act == act2

                act_space.seed(0)
                for i in range(10):
                    gym_act = act_space.sample()
                    act = act_space.from_gym(gym_act)
                    self._aux_test_json(act_space, gym_act)
                    gym_act2 = act_space.to_gym(act)
                    act2 = act_space.from_gym(gym_act2)
                    assert act == act2